Merge "Refactor scatter-gather utility to return exception objects"

This commit is contained in:
Zuul 2018-11-03 01:08:18 +00:00 committed by Gerrit Code Review
commit 9db1c6061d
17 changed files with 61 additions and 47 deletions

View File

@ -54,8 +54,7 @@ class ConsoleAuthTokensController(wsgi.Controller):
# loop as soon as we find a result because the token is associated
# with one instance, which can only be in one cell.
for result in results.values():
if result not in (nova_context.did_not_respond_sentinel,
nova_context.raised_exception_sentinel):
if not nova_context.is_cell_failure_sentinel(result):
connect_info = result.to_dict()
break

View File

@ -536,7 +536,7 @@ class ViewBuilder(common.ViewBuilder):
objects.BlockDeviceMappingList.bdms_by_instance_uuid,
instance_uuids)
for cell_uuid, result in results.items():
if result is nova_context.raised_exception_sentinel:
if isinstance(result, Exception):
LOG.warning('Failed to get block device mappings for cell %s',
cell_uuid)
elif result is nova_context.did_not_respond_sentinel:

View File

@ -5046,8 +5046,7 @@ class HostAPI(base.Base):
service_dict = nova_context.scatter_gather_all_cells(context,
objects.ServiceList.get_all, disabled, set_zones=set_zones)
for service in service_dict.values():
if service not in (nova_context.did_not_respond_sentinel,
nova_context.raised_exception_sentinel):
if not nova_context.is_cell_failure_sentinel(service):
services.extend(service)
else:
services = objects.ServiceList.get_all(context, disabled,

View File

@ -57,8 +57,7 @@ class MigrationLister(multi_cell_list.CrossCellLister):
ctx, db.migration_get_by_uuid, marker)
db_migration = None
for result_cell_uuid, result in results.items():
if result not in (context.did_not_respond_sentinel,
context.raised_exception_sentinel):
if not context.is_cell_failure_sentinel(result):
db_migration = result
cell_uuid = result_cell_uuid
break

View File

@ -25,8 +25,6 @@ from nova import exception
from nova.i18n import _
LOG = logging.getLogger(__name__)
CELL_FAIL_SENTINELS = (context.did_not_respond_sentinel,
context.raised_exception_sentinel)
CONF = nova.conf.CONF
@ -77,9 +75,9 @@ class RecordWrapper(object):
# $limit results from good cells before we noticed the failed
# cells, and would not properly report them as failed for
# fix-up in the higher layers.
if self._db_record in CELL_FAIL_SENTINELS:
if context.is_cell_failure_sentinel(self._db_record):
return True
elif other._db_record in CELL_FAIL_SENTINELS:
elif context.is_cell_failure_sentinel(other._db_record):
return False
r = self._sort_ctx.compare_records(self._db_record,
@ -108,11 +106,11 @@ def query_wrapper(ctx, fn, *args, **kwargs):
# wrapping the sentinel indicating timeout.
yield RecordWrapper(ctx, None, context.did_not_respond_sentinel)
raise StopIteration
except Exception:
except Exception as e:
# Here, we yield a RecordWrapper (no sort_ctx needed since
# we won't call into the implementation's comparison routines)
# wrapping the sentinel indicating failure.
yield RecordWrapper(ctx, None, context.raised_exception_sentinel)
# wrapping the exception object indicating failure.
yield RecordWrapper(ctx, None, e.__class__(e.args))
raise StopIteration
@ -403,7 +401,7 @@ class CrossCellLister(object):
except StopIteration:
return
if item._db_record in CELL_FAIL_SENTINELS:
if context.is_cell_failure_sentinel(item._db_record):
if not CONF.api.list_records_by_skipping_down_cells:
raise exception.NovaException(
_('Cell %s is not responding but configuration '
@ -413,7 +411,7 @@ class CrossCellLister(object):
item.cell_uuid)
if item._db_record == context.did_not_respond_sentinel:
self._cells_timed_out.add(item.cell_uuid)
elif item._db_record == context.raised_exception_sentinel:
elif isinstance(item._db_record, Exception):
self._cells_failed.add(item.cell_uuid)
# We might have received one batch but timed out or failed
# on a later one, so be sure we fix the accounting.

View File

@ -45,9 +45,6 @@ CELL_CACHE = {}
# NOTE(melwitt): Used for the scatter-gather utility to indicate we timed out
# waiting for a result from a cell.
did_not_respond_sentinel = object()
# NOTE(melwitt): Used for the scatter-gather utility to indicate an exception
# was raised gathering a result from a cell.
raised_exception_sentinel = object()
# FIXME(danms): Keep a global cache of the cells we find the
# first time we look. This needs to be refreshed on a timer or
# trigger.
@ -429,7 +426,7 @@ def scatter_gather_cells(context, cell_mappings, timeout, fn, *args, **kwargs):
:param kwargs: The kwargs for the function to call for each cell
:returns: A dict {cell_uuid: result} containing the joined results. The
did_not_respond_sentinel will be returned if a cell did not
respond within the timeout. The raised_exception_sentinel will
respond within the timeout. The exception object will
be returned if the call to a cell raised an exception. The
exception will be logged.
"""
@ -442,9 +439,9 @@ def scatter_gather_cells(context, cell_mappings, timeout, fn, *args, **kwargs):
try:
with target_cell(context, cell_mapping) as cctxt:
result = fn(cctxt, *args, **kwargs)
except Exception:
except Exception as e:
LOG.exception('Error gathering result from cell %s', cell_uuid)
result = raised_exception_sentinel
result = e.__class__(e.args)
# The queue is already synchronized.
queue.put((cell_uuid, result))
@ -488,6 +485,11 @@ def load_cells():
LOG.error('No cells are configured, unable to continue')
def is_cell_failure_sentinel(record):
return (record is did_not_respond_sentinel or
isinstance(record, Exception))
def scatter_gather_skip_cell0(context, fn, *args, **kwargs):
"""Target all cells except cell0 in parallel and return their results.
@ -502,7 +504,7 @@ def scatter_gather_skip_cell0(context, fn, *args, **kwargs):
:param kwargs: The kwargs for the function to call for each cell
:returns: A dict {cell_uuid: result} containing the joined results. The
did_not_respond_sentinel will be returned if a cell did not
respond within the timeout. The raised_exception_sentinel will
respond within the timeout. The exception object will
be returned if the call to a cell raised an exception. The
exception will be logged.
"""
@ -527,7 +529,7 @@ def scatter_gather_single_cell(context, cell_mapping, fn, *args, **kwargs):
:param kwargs: The kwargs for the function to call for this cell
:returns: A dict {cell_uuid: result} containing the joined results. The
did_not_respond_sentinel will be returned if the cell did not
respond within the timeout. The raised_exception_sentinel will
respond within the timeout. The exception object will
be returned if the call to the cell raised an exception. The
exception will be logged.
"""
@ -549,7 +551,7 @@ def scatter_gather_all_cells(context, fn, *args, **kwargs):
:param kwargs: The kwargs for the function to call for each cell
:returns: A dict {cell_uuid: result} containing the joined results. The
did_not_respond_sentinel will be returned if a cell did not
respond within the timeout. The raised_exception_sentinel will
respond within the timeout. The exception object will
be returned if the call to a cell raised an exception. The
exception will be logged.
"""

View File

@ -519,7 +519,7 @@ def get_minimum_version_all_cells(context, binaries, require_all=False):
'service version', cell_uuid)
if require_all:
raise exception.CellTimeout()
elif result is nova_context.raised_exception_sentinel:
elif isinstance(result, Exception):
LOG.warning('Failed to get minimum service version for cell %s',
cell_uuid)
if require_all:

View File

@ -1174,8 +1174,7 @@ def _instances_cores_ram_count(context, project_id, user_id=None):
if user_id:
total_counts['user'] = {'instances': 0, 'cores': 0, 'ram': 0}
for result in results.values():
if result not in (nova_context.did_not_respond_sentinel,
nova_context.raised_exception_sentinel):
if not nova_context.is_cell_failure_sentinel(result):
for resource, count in result['project'].items():
total_counts['project'][resource] += count
if user_id:

View File

@ -630,7 +630,7 @@ class HostManager(object):
compute_nodes = collections.defaultdict(list)
services = {}
for cell_uuid, result in results.items():
if result is context_module.raised_exception_sentinel:
if isinstance(result, Exception):
LOG.warning('Failed to get computes for cell %s', cell_uuid)
elif result is context_module.did_not_respond_sentinel:
LOG.warning('Timeout getting computes for cell %s', cell_uuid)

View File

@ -892,8 +892,7 @@ def _get_instance_group_hosts_all_cells(context, instance_group):
# is raised while targeting a cell and when a cell does not respond
# as part of the "handling of a down cell" spec:
# https://blueprints.launchpad.net/nova/+spec/handling-down-cell
if result not in (nova_context.did_not_respond_sentinel,
nova_context.raised_exception_sentinel):
if not nova_context.is_cell_failure_sentinel(result):
hosts.extend(result)
return hosts

View File

@ -6803,7 +6803,7 @@ class ServersViewBuilderTest(test.TestCase):
# just faking a nova list scenario
mock_sg.return_value = {
uuids.cell1: bdms[0],
uuids.cell2: context.raised_exception_sentinel
uuids.cell2: exception.BDMNotFound(id='fake')
}
ctxt = context.RequestContext('fake', 'fake')
result = self.view_builder._get_instance_bdms_in_multiple_cells(

View File

@ -192,7 +192,7 @@ class ComputeHostAPITestCase(test.TestCase):
host='host-%s' % uuids.cell1)
mock_sg.return_value = {
uuids.cell1: [service],
uuids.cell2: context.raised_exception_sentinel
uuids.cell2: context.did_not_respond_sentinel
}
services = self.host_api.service_get_all(self.ctxt, all_cells=True)
# returns the results from cell1 and ignores cell2.

View File

@ -175,7 +175,7 @@ class TestInstanceList(test.NoDBTestCase):
# creating one up cell and two down cells
ret_val = {}
ret_val[uuids.cell0] = instances
ret_val[uuids.cell1] = [wrap(nova_context.raised_exception_sentinel)]
ret_val[uuids.cell1] = [wrap(exception.BuildRequestNotFound(uuid='f'))]
ret_val[uuids.cell2] = [wrap(nova_context.did_not_respond_sentinel)]
mock_sg.return_value = ret_val
@ -201,7 +201,7 @@ class TestInstanceList(test.NoDBTestCase):
# creating one up cell and two down cells
ret_val = {}
ret_val[uuids.cell0] = instances
ret_val[uuids.cell1] = [wrap(nova_context.raised_exception_sentinel)]
ret_val[uuids.cell1] = [wrap(exception.BuildRequestNotFound(uuid='f'))]
ret_val[uuids.cell2] = [wrap(nova_context.did_not_respond_sentinel)]
mock_sg.return_value = ret_val

View File

@ -123,7 +123,8 @@ class TestUtils(test.NoDBTestCase):
iw2 = multi_cell_list.RecordWrapper(ctx, sort_ctx,
context.did_not_respond_sentinel)
iw3 = multi_cell_list.RecordWrapper(ctx, sort_ctx,
context.raised_exception_sentinel)
exception.InstanceNotFound(
instance_id='fake'))
# NOTE(danms): The sentinel wrappers always win
self.assertTrue(iw2 < iw1)
@ -157,13 +158,15 @@ class TestUtils(test.NoDBTestCase):
mock.MagicMock(), test)])
def test_query_wrapper_fail(self):
def test(ctx):
def tester(ctx):
raise test.TestingException
self.assertEqual([context.raised_exception_sentinel],
[x._db_record for x in
multi_cell_list.query_wrapper(
mock.MagicMock(), test)])
self.assertIsInstance(
# query_wrapper is a generator so we convert to a list and
# check the type on the first and only result
[x._db_record for x in multi_cell_list.query_wrapper(
mock.MagicMock(), tester)][0],
test.TestingException)
class TestListContext(multi_cell_list.RecordSortContext):
@ -351,7 +354,7 @@ class FailureLister(TestLister):
if action == context.did_not_respond_sentinel:
raise exception.CellTimeout
elif action == context.raised_exception_sentinel:
elif isinstance(action, Exception):
raise test.TestingException
else:
return super(FailureLister, self).get_by_filters(ctx, *a, **k)
@ -369,7 +372,11 @@ class TestBaseClass(test.NoDBTestCase):
# Two of the cells will fail, one with timeout and one
# with an error
lister.set_fails(uuids.cell0, [context.did_not_respond_sentinel])
lister.set_fails(uuids.cell1, [context.raised_exception_sentinel])
# Note that InstanceNotFound exception will never appear during
# instance listing, the aim is to only simulate a situation where
# there could be some type of exception arising.
lister.set_fails(uuids.cell1, exception.InstanceNotFound(
instance_id='fake'))
ctx = context.RequestContext()
result = lister.get_records_sorted(ctx, {}, 50, None, batch_size=10)
# We should still have 50 results since there are enough from the
@ -390,7 +397,11 @@ class TestBaseClass(test.NoDBTestCase):
# One cell will succeed and then time out, one will fail immediately,
# and the last will always work
lister.set_fails(uuids.cell0, [None, context.did_not_respond_sentinel])
lister.set_fails(uuids.cell1, [context.raised_exception_sentinel])
# Note that BuildAbortException will never appear during instance
# listing, the aim is to only simulate a situation where there could
# be some type of exception arising.
lister.set_fails(uuids.cell1, exception.BuildAbortException(
instance_uuid='fake', reason='fake'))
ctx = context.RequestContext()
result = lister.get_records_sorted(ctx, {}, 50, None,
batch_size=5)

View File

@ -550,7 +550,7 @@ class TestServiceVersionCells(test.TestCase):
def test_version_all_cells_with_fail(self, mock_scatter):
mock_scatter.return_value = {
'foo': {'nova-compute': 13},
'bar': context.raised_exception_sentinel,
'bar': exception.ServiceNotFound(service_id='fake'),
}
self.assertEqual(13, service.get_minimum_version_all_cells(
self.context, ['nova-compute']))

View File

@ -1039,7 +1039,7 @@ class HostManagerTestCase(test.NoDBTestCase):
uuids.cell1: ([mock.MagicMock(host='a'), mock.MagicMock(host='b')],
[mock.sentinel.c1n1, mock.sentinel.c1n2]),
uuids.cell2: nova_context.did_not_respond_sentinel,
uuids.cell3: nova_context.raised_exception_sentinel,
uuids.cell3: exception.ComputeHostNotFound(host='c'),
}
context = nova_context.RequestContext('fake', 'fake')
cns, srv = self.host_manager._get_computes_for_cells(context, [])

View File

@ -335,6 +335,14 @@ class ContextTestCase(test.NoDBTestCase):
mock_create_cm.assert_not_called()
mock_create_tport.assert_not_called()
def test_is_cell_failure_sentinel(self):
record = context.did_not_respond_sentinel
self.assertTrue(context.is_cell_failure_sentinel(record))
record = TypeError()
self.assertTrue(context.is_cell_failure_sentinel(record))
record = objects.Instance()
self.assertFalse(context.is_cell_failure_sentinel(record))
@mock.patch('nova.context.target_cell')
@mock.patch('nova.objects.InstanceList.get_by_filters')
def test_scatter_gather_cells(self, mock_get_inst, mock_target_cell):
@ -422,7 +430,7 @@ class ContextTestCase(test.NoDBTestCase):
ctxt, mappings, 30, objects.InstanceList.get_by_filters)
self.assertEqual(2, len(results))
self.assertIn(mock.sentinel.instances, results.values())
self.assertIn(context.raised_exception_sentinel, results.values())
isinstance(results.values(), Exception)
self.assertTrue(mock_log_exception.called)
@mock.patch('nova.context.scatter_gather_cells')