From 031314b6d87638ebaf01a42dc97196ecb10c8499 Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Thu, 4 Oct 2018 13:41:49 +0200 Subject: [PATCH] Refactor scatter-gather utility to return exception objects Scatter-gather utility returns a raised_exception_sentinel for all kinds of exceptions that are caught and often times there maybe situations where we may have to handle the different types of exceptions differently. To facilitate that, it might be more useful to return the Exception object itself instead of the dummy raised_exception_sentinel so that based on the result's exception type we can handle them differently. Related to blueprint handling-down-cell Change-Id: I861b223ee46b0f0a31f646a4b45f8a02410253cf --- .../openstack/compute/console_auth_tokens.py | 3 +- nova/api/openstack/compute/views/servers.py | 2 +- nova/compute/api.py | 3 +- nova/compute/migration_list.py | 3 +- nova/compute/multi_cell_list.py | 16 +++++----- nova/context.py | 20 +++++++------ nova/objects/service.py | 2 +- nova/quota.py | 3 +- nova/scheduler/host_manager.py | 2 +- nova/scheduler/utils.py | 3 +- .../api/openstack/compute/test_serversV21.py | 2 +- nova/tests/unit/compute/test_host_api.py | 2 +- nova/tests/unit/compute/test_instance_list.py | 4 +-- .../unit/compute/test_multi_cell_list.py | 29 +++++++++++++------ nova/tests/unit/objects/test_service.py | 2 +- .../tests/unit/scheduler/test_host_manager.py | 2 +- nova/tests/unit/test_context.py | 10 ++++++- 17 files changed, 61 insertions(+), 47 deletions(-) diff --git a/nova/api/openstack/compute/console_auth_tokens.py b/nova/api/openstack/compute/console_auth_tokens.py index d7ce020f241e..a85b6a35424e 100644 --- a/nova/api/openstack/compute/console_auth_tokens.py +++ b/nova/api/openstack/compute/console_auth_tokens.py @@ -54,8 +54,7 @@ class ConsoleAuthTokensController(wsgi.Controller): # loop as soon as we find a result because the token is associated # with one instance, which can only be in one cell. for result in results.values(): - if result not in (nova_context.did_not_respond_sentinel, - nova_context.raised_exception_sentinel): + if not nova_context.is_cell_failure_sentinel(result): connect_info = result.to_dict() break diff --git a/nova/api/openstack/compute/views/servers.py b/nova/api/openstack/compute/views/servers.py index ee858b0c25ef..1eab3d61d1ce 100644 --- a/nova/api/openstack/compute/views/servers.py +++ b/nova/api/openstack/compute/views/servers.py @@ -536,7 +536,7 @@ class ViewBuilder(common.ViewBuilder): objects.BlockDeviceMappingList.bdms_by_instance_uuid, instance_uuids) for cell_uuid, result in results.items(): - if result is nova_context.raised_exception_sentinel: + if isinstance(result, Exception): LOG.warning('Failed to get block device mappings for cell %s', cell_uuid) elif result is nova_context.did_not_respond_sentinel: diff --git a/nova/compute/api.py b/nova/compute/api.py index b39ad081c39d..e10d098f303a 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -5046,8 +5046,7 @@ class HostAPI(base.Base): service_dict = nova_context.scatter_gather_all_cells(context, objects.ServiceList.get_all, disabled, set_zones=set_zones) for service in service_dict.values(): - if service not in (nova_context.did_not_respond_sentinel, - nova_context.raised_exception_sentinel): + if not nova_context.is_cell_failure_sentinel(service): services.extend(service) else: services = objects.ServiceList.get_all(context, disabled, diff --git a/nova/compute/migration_list.py b/nova/compute/migration_list.py index ca0c1ff95651..77b36c83feb2 100644 --- a/nova/compute/migration_list.py +++ b/nova/compute/migration_list.py @@ -57,8 +57,7 @@ class MigrationLister(multi_cell_list.CrossCellLister): ctx, db.migration_get_by_uuid, marker) db_migration = None for result_cell_uuid, result in results.items(): - if result not in (context.did_not_respond_sentinel, - context.raised_exception_sentinel): + if not context.is_cell_failure_sentinel(result): db_migration = result cell_uuid = result_cell_uuid break diff --git a/nova/compute/multi_cell_list.py b/nova/compute/multi_cell_list.py index 99f29a41c15a..4975d49688a0 100644 --- a/nova/compute/multi_cell_list.py +++ b/nova/compute/multi_cell_list.py @@ -25,8 +25,6 @@ from nova import exception from nova.i18n import _ LOG = logging.getLogger(__name__) -CELL_FAIL_SENTINELS = (context.did_not_respond_sentinel, - context.raised_exception_sentinel) CONF = nova.conf.CONF @@ -77,9 +75,9 @@ class RecordWrapper(object): # $limit results from good cells before we noticed the failed # cells, and would not properly report them as failed for # fix-up in the higher layers. - if self._db_record in CELL_FAIL_SENTINELS: + if context.is_cell_failure_sentinel(self._db_record): return True - elif other._db_record in CELL_FAIL_SENTINELS: + elif context.is_cell_failure_sentinel(other._db_record): return False r = self._sort_ctx.compare_records(self._db_record, @@ -108,11 +106,11 @@ def query_wrapper(ctx, fn, *args, **kwargs): # wrapping the sentinel indicating timeout. yield RecordWrapper(ctx, None, context.did_not_respond_sentinel) raise StopIteration - except Exception: + except Exception as e: # Here, we yield a RecordWrapper (no sort_ctx needed since # we won't call into the implementation's comparison routines) - # wrapping the sentinel indicating failure. - yield RecordWrapper(ctx, None, context.raised_exception_sentinel) + # wrapping the exception object indicating failure. + yield RecordWrapper(ctx, None, e.__class__(e.args)) raise StopIteration @@ -403,7 +401,7 @@ class CrossCellLister(object): except StopIteration: return - if item._db_record in CELL_FAIL_SENTINELS: + if context.is_cell_failure_sentinel(item._db_record): if not CONF.api.list_records_by_skipping_down_cells: raise exception.NovaException( _('Cell %s is not responding but configuration ' @@ -413,7 +411,7 @@ class CrossCellLister(object): item.cell_uuid) if item._db_record == context.did_not_respond_sentinel: self._cells_timed_out.add(item.cell_uuid) - elif item._db_record == context.raised_exception_sentinel: + elif isinstance(item._db_record, Exception): self._cells_failed.add(item.cell_uuid) # We might have received one batch but timed out or failed # on a later one, so be sure we fix the accounting. diff --git a/nova/context.py b/nova/context.py index 59830ff3dc1e..f290a02be7fb 100644 --- a/nova/context.py +++ b/nova/context.py @@ -45,9 +45,6 @@ CELL_CACHE = {} # NOTE(melwitt): Used for the scatter-gather utility to indicate we timed out # waiting for a result from a cell. did_not_respond_sentinel = object() -# NOTE(melwitt): Used for the scatter-gather utility to indicate an exception -# was raised gathering a result from a cell. -raised_exception_sentinel = object() # FIXME(danms): Keep a global cache of the cells we find the # first time we look. This needs to be refreshed on a timer or # trigger. @@ -429,7 +426,7 @@ def scatter_gather_cells(context, cell_mappings, timeout, fn, *args, **kwargs): :param kwargs: The kwargs for the function to call for each cell :returns: A dict {cell_uuid: result} containing the joined results. The did_not_respond_sentinel will be returned if a cell did not - respond within the timeout. The raised_exception_sentinel will + respond within the timeout. The exception object will be returned if the call to a cell raised an exception. The exception will be logged. """ @@ -442,9 +439,9 @@ def scatter_gather_cells(context, cell_mappings, timeout, fn, *args, **kwargs): try: with target_cell(context, cell_mapping) as cctxt: result = fn(cctxt, *args, **kwargs) - except Exception: + except Exception as e: LOG.exception('Error gathering result from cell %s', cell_uuid) - result = raised_exception_sentinel + result = e.__class__(e.args) # The queue is already synchronized. queue.put((cell_uuid, result)) @@ -488,6 +485,11 @@ def load_cells(): LOG.error('No cells are configured, unable to continue') +def is_cell_failure_sentinel(record): + return (record is did_not_respond_sentinel or + isinstance(record, Exception)) + + def scatter_gather_skip_cell0(context, fn, *args, **kwargs): """Target all cells except cell0 in parallel and return their results. @@ -502,7 +504,7 @@ def scatter_gather_skip_cell0(context, fn, *args, **kwargs): :param kwargs: The kwargs for the function to call for each cell :returns: A dict {cell_uuid: result} containing the joined results. The did_not_respond_sentinel will be returned if a cell did not - respond within the timeout. The raised_exception_sentinel will + respond within the timeout. The exception object will be returned if the call to a cell raised an exception. The exception will be logged. """ @@ -527,7 +529,7 @@ def scatter_gather_single_cell(context, cell_mapping, fn, *args, **kwargs): :param kwargs: The kwargs for the function to call for this cell :returns: A dict {cell_uuid: result} containing the joined results. The did_not_respond_sentinel will be returned if the cell did not - respond within the timeout. The raised_exception_sentinel will + respond within the timeout. The exception object will be returned if the call to the cell raised an exception. The exception will be logged. """ @@ -549,7 +551,7 @@ def scatter_gather_all_cells(context, fn, *args, **kwargs): :param kwargs: The kwargs for the function to call for each cell :returns: A dict {cell_uuid: result} containing the joined results. The did_not_respond_sentinel will be returned if a cell did not - respond within the timeout. The raised_exception_sentinel will + respond within the timeout. The exception object will be returned if the call to a cell raised an exception. The exception will be logged. """ diff --git a/nova/objects/service.py b/nova/objects/service.py index f761dd331cf4..0493de711bd7 100644 --- a/nova/objects/service.py +++ b/nova/objects/service.py @@ -519,7 +519,7 @@ def get_minimum_version_all_cells(context, binaries, require_all=False): 'service version', cell_uuid) if require_all: raise exception.CellTimeout() - elif result is nova_context.raised_exception_sentinel: + elif isinstance(result, Exception): LOG.warning('Failed to get minimum service version for cell %s', cell_uuid) if require_all: diff --git a/nova/quota.py b/nova/quota.py index a75abb1f2917..7386aad8f3eb 100644 --- a/nova/quota.py +++ b/nova/quota.py @@ -1338,8 +1338,7 @@ def _instances_cores_ram_count(context, project_id, user_id=None): if user_id: total_counts['user'] = {'instances': 0, 'cores': 0, 'ram': 0} for result in results.values(): - if result not in (nova_context.did_not_respond_sentinel, - nova_context.raised_exception_sentinel): + if not nova_context.is_cell_failure_sentinel(result): for resource, count in result['project'].items(): total_counts['project'][resource] += count if user_id: diff --git a/nova/scheduler/host_manager.py b/nova/scheduler/host_manager.py index 7eda6ee4d20c..bb9624a2a3f4 100644 --- a/nova/scheduler/host_manager.py +++ b/nova/scheduler/host_manager.py @@ -630,7 +630,7 @@ class HostManager(object): compute_nodes = collections.defaultdict(list) services = {} for cell_uuid, result in results.items(): - if result is context_module.raised_exception_sentinel: + if isinstance(result, Exception): LOG.warning('Failed to get computes for cell %s', cell_uuid) elif result is context_module.did_not_respond_sentinel: LOG.warning('Timeout getting computes for cell %s', cell_uuid) diff --git a/nova/scheduler/utils.py b/nova/scheduler/utils.py index bcb5beb540a6..5022b44f0fd0 100644 --- a/nova/scheduler/utils.py +++ b/nova/scheduler/utils.py @@ -876,8 +876,7 @@ def _get_instance_group_hosts_all_cells(context, instance_group): # is raised while targeting a cell and when a cell does not respond # as part of the "handling of a down cell" spec: # https://blueprints.launchpad.net/nova/+spec/handling-down-cell - if result not in (nova_context.did_not_respond_sentinel, - nova_context.raised_exception_sentinel): + if not nova_context.is_cell_failure_sentinel(result): hosts.extend(result) return hosts diff --git a/nova/tests/unit/api/openstack/compute/test_serversV21.py b/nova/tests/unit/api/openstack/compute/test_serversV21.py index e44a8ea7a74b..3dffa8684a16 100644 --- a/nova/tests/unit/api/openstack/compute/test_serversV21.py +++ b/nova/tests/unit/api/openstack/compute/test_serversV21.py @@ -6803,7 +6803,7 @@ class ServersViewBuilderTest(test.TestCase): # just faking a nova list scenario mock_sg.return_value = { uuids.cell1: bdms[0], - uuids.cell2: context.raised_exception_sentinel + uuids.cell2: exception.BDMNotFound(id='fake') } ctxt = context.RequestContext('fake', 'fake') result = self.view_builder._get_instance_bdms_in_multiple_cells( diff --git a/nova/tests/unit/compute/test_host_api.py b/nova/tests/unit/compute/test_host_api.py index 48e2dd97470c..bac58ed73261 100644 --- a/nova/tests/unit/compute/test_host_api.py +++ b/nova/tests/unit/compute/test_host_api.py @@ -192,7 +192,7 @@ class ComputeHostAPITestCase(test.TestCase): host='host-%s' % uuids.cell1) mock_sg.return_value = { uuids.cell1: [service], - uuids.cell2: context.raised_exception_sentinel + uuids.cell2: context.did_not_respond_sentinel } services = self.host_api.service_get_all(self.ctxt, all_cells=True) # returns the results from cell1 and ignores cell2. diff --git a/nova/tests/unit/compute/test_instance_list.py b/nova/tests/unit/compute/test_instance_list.py index ca77a4350968..25c0a4c4f733 100644 --- a/nova/tests/unit/compute/test_instance_list.py +++ b/nova/tests/unit/compute/test_instance_list.py @@ -175,7 +175,7 @@ class TestInstanceList(test.NoDBTestCase): # creating one up cell and two down cells ret_val = {} ret_val[uuids.cell0] = instances - ret_val[uuids.cell1] = [wrap(nova_context.raised_exception_sentinel)] + ret_val[uuids.cell1] = [wrap(exception.BuildRequestNotFound(uuid='f'))] ret_val[uuids.cell2] = [wrap(nova_context.did_not_respond_sentinel)] mock_sg.return_value = ret_val @@ -201,7 +201,7 @@ class TestInstanceList(test.NoDBTestCase): # creating one up cell and two down cells ret_val = {} ret_val[uuids.cell0] = instances - ret_val[uuids.cell1] = [wrap(nova_context.raised_exception_sentinel)] + ret_val[uuids.cell1] = [wrap(exception.BuildRequestNotFound(uuid='f'))] ret_val[uuids.cell2] = [wrap(nova_context.did_not_respond_sentinel)] mock_sg.return_value = ret_val diff --git a/nova/tests/unit/compute/test_multi_cell_list.py b/nova/tests/unit/compute/test_multi_cell_list.py index b96edef2940b..6bb67a76b8d6 100644 --- a/nova/tests/unit/compute/test_multi_cell_list.py +++ b/nova/tests/unit/compute/test_multi_cell_list.py @@ -123,7 +123,8 @@ class TestUtils(test.NoDBTestCase): iw2 = multi_cell_list.RecordWrapper(ctx, sort_ctx, context.did_not_respond_sentinel) iw3 = multi_cell_list.RecordWrapper(ctx, sort_ctx, - context.raised_exception_sentinel) + exception.InstanceNotFound( + instance_id='fake')) # NOTE(danms): The sentinel wrappers always win self.assertTrue(iw2 < iw1) @@ -157,13 +158,15 @@ class TestUtils(test.NoDBTestCase): mock.MagicMock(), test)]) def test_query_wrapper_fail(self): - def test(ctx): + def tester(ctx): raise test.TestingException - self.assertEqual([context.raised_exception_sentinel], - [x._db_record for x in - multi_cell_list.query_wrapper( - mock.MagicMock(), test)]) + self.assertIsInstance( + # query_wrapper is a generator so we convert to a list and + # check the type on the first and only result + [x._db_record for x in multi_cell_list.query_wrapper( + mock.MagicMock(), tester)][0], + test.TestingException) class TestListContext(multi_cell_list.RecordSortContext): @@ -351,7 +354,7 @@ class FailureLister(TestLister): if action == context.did_not_respond_sentinel: raise exception.CellTimeout - elif action == context.raised_exception_sentinel: + elif isinstance(action, Exception): raise test.TestingException else: return super(FailureLister, self).get_by_filters(ctx, *a, **k) @@ -369,7 +372,11 @@ class TestBaseClass(test.NoDBTestCase): # Two of the cells will fail, one with timeout and one # with an error lister.set_fails(uuids.cell0, [context.did_not_respond_sentinel]) - lister.set_fails(uuids.cell1, [context.raised_exception_sentinel]) + # Note that InstanceNotFound exception will never appear during + # instance listing, the aim is to only simulate a situation where + # there could be some type of exception arising. + lister.set_fails(uuids.cell1, exception.InstanceNotFound( + instance_id='fake')) ctx = context.RequestContext() result = lister.get_records_sorted(ctx, {}, 50, None, batch_size=10) # We should still have 50 results since there are enough from the @@ -390,7 +397,11 @@ class TestBaseClass(test.NoDBTestCase): # One cell will succeed and then time out, one will fail immediately, # and the last will always work lister.set_fails(uuids.cell0, [None, context.did_not_respond_sentinel]) - lister.set_fails(uuids.cell1, [context.raised_exception_sentinel]) + # Note that BuildAbortException will never appear during instance + # listing, the aim is to only simulate a situation where there could + # be some type of exception arising. + lister.set_fails(uuids.cell1, exception.BuildAbortException( + instance_uuid='fake', reason='fake')) ctx = context.RequestContext() result = lister.get_records_sorted(ctx, {}, 50, None, batch_size=5) diff --git a/nova/tests/unit/objects/test_service.py b/nova/tests/unit/objects/test_service.py index f824bf88a1ad..79b945ec55c9 100644 --- a/nova/tests/unit/objects/test_service.py +++ b/nova/tests/unit/objects/test_service.py @@ -550,7 +550,7 @@ class TestServiceVersionCells(test.TestCase): def test_version_all_cells_with_fail(self, mock_scatter): mock_scatter.return_value = { 'foo': {'nova-compute': 13}, - 'bar': context.raised_exception_sentinel, + 'bar': exception.ServiceNotFound(service_id='fake'), } self.assertEqual(13, service.get_minimum_version_all_cells( self.context, ['nova-compute'])) diff --git a/nova/tests/unit/scheduler/test_host_manager.py b/nova/tests/unit/scheduler/test_host_manager.py index 2bf626d677ca..267c2b52bd9a 100644 --- a/nova/tests/unit/scheduler/test_host_manager.py +++ b/nova/tests/unit/scheduler/test_host_manager.py @@ -1039,7 +1039,7 @@ class HostManagerTestCase(test.NoDBTestCase): uuids.cell1: ([mock.MagicMock(host='a'), mock.MagicMock(host='b')], [mock.sentinel.c1n1, mock.sentinel.c1n2]), uuids.cell2: nova_context.did_not_respond_sentinel, - uuids.cell3: nova_context.raised_exception_sentinel, + uuids.cell3: exception.ComputeHostNotFound(host='c'), } context = nova_context.RequestContext('fake', 'fake') cns, srv = self.host_manager._get_computes_for_cells(context, []) diff --git a/nova/tests/unit/test_context.py b/nova/tests/unit/test_context.py index ac40f9a7cf66..b40d95bf5b11 100644 --- a/nova/tests/unit/test_context.py +++ b/nova/tests/unit/test_context.py @@ -335,6 +335,14 @@ class ContextTestCase(test.NoDBTestCase): mock_create_cm.assert_not_called() mock_create_tport.assert_not_called() + def test_is_cell_failure_sentinel(self): + record = context.did_not_respond_sentinel + self.assertTrue(context.is_cell_failure_sentinel(record)) + record = TypeError() + self.assertTrue(context.is_cell_failure_sentinel(record)) + record = objects.Instance() + self.assertFalse(context.is_cell_failure_sentinel(record)) + @mock.patch('nova.context.target_cell') @mock.patch('nova.objects.InstanceList.get_by_filters') def test_scatter_gather_cells(self, mock_get_inst, mock_target_cell): @@ -422,7 +430,7 @@ class ContextTestCase(test.NoDBTestCase): ctxt, mappings, 30, objects.InstanceList.get_by_filters) self.assertEqual(2, len(results)) self.assertIn(mock.sentinel.instances, results.values()) - self.assertIn(context.raised_exception_sentinel, results.values()) + isinstance(results.values(), Exception) self.assertTrue(mock_log_exception.called) @mock.patch('nova.context.scatter_gather_cells')