From 06d25926a1bc0b062d0c11ad5580f7faaeee3f0c Mon Sep 17 00:00:00 2001 From: Artom Lifshitz Date: Sat, 16 Dec 2023 11:14:30 -0500 Subject: [PATCH] Allow best effort sending of notifications In the previous patch we changed the ordering of operations during post_live_migration() to minimize guest networking downtime by activating destination host port bindings as soon as possible. Review of that patch led to the realization that exceptions during notification sending can prevent the port binding activation from happening. Instead of handling that in a localized try/catch, this patch implements a general best_effort kwarg to our two notification sending helpers to allow callers to indicate that any exceptions during notification sending should not be fatal. Change-Id: I01a15d6fffe98816ae019e67dc72784299fedfd3 --- nova/compute/manager.py | 10 +++-- nova/compute/utils.py | 22 ++++++++-- .../functional/compute/test_live_migration.py | 3 +- nova/tests/unit/compute/test_compute.py | 3 +- nova/tests/unit/compute/test_utils.py | 44 +++++++++++++++++++ 5 files changed, 72 insertions(+), 10 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 4927225b0be0..9941e0ed5a7e 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -2261,11 +2261,12 @@ class ComputeManager(manager.Manager): def _notify_about_instance_usage(self, context, instance, event_suffix, network_info=None, extra_usage_info=None, - fault=None): + fault=None, best_effort=False): compute_utils.notify_about_instance_usage( self.notifier, context, instance, event_suffix, network_info=network_info, - extra_usage_info=extra_usage_info, fault=fault) + extra_usage_info=extra_usage_info, fault=fault, + best_effort=best_effort) def _deallocate_network(self, context, instance, requested_networks=None): @@ -9358,11 +9359,12 @@ class ComputeManager(manager.Manager): self._notify_about_instance_usage(ctxt, instance, "live_migration._post.start", - network_info=network_info) + network_info=network_info, + best_effort=True) compute_utils.notify_about_instance_action( ctxt, instance, self.host, action=fields.NotificationAction.LIVE_MIGRATION_POST, - phase=fields.NotificationPhase.START) + phase=fields.NotificationPhase.START, best_effort=True) migration = objects.Migration( source_compute=self.host, dest_compute=dest, diff --git a/nova/compute/utils.py b/nova/compute/utils.py index 30efc24fc794..acf3d9ccb9be 100644 --- a/nova/compute/utils.py +++ b/nova/compute/utils.py @@ -406,7 +406,7 @@ def notify_usage_exists(notifier, context, instance_ref, host, def notify_about_instance_usage(notifier, context, instance, event_suffix, network_info=None, extra_usage_info=None, - fault=None): + fault=None, best_effort=False): """Send an unversioned legacy notification about an instance. All new notifications should use notify_about_instance_action which sends @@ -435,7 +435,14 @@ def notify_about_instance_usage(notifier, context, instance, event_suffix, else: method = notifier.info - method(context, 'compute.instance.%s' % event_suffix, usage_info) + try: + method(context, 'compute.instance.%s' % event_suffix, usage_info) + except Exception as e: + if best_effort: + LOG.error('Exception during notification sending: %s. ' + 'Attempting to proceed with normal operation.', e) + else: + raise e def _get_fault_and_priority_from_exception(exception: Exception): @@ -454,7 +461,7 @@ def _get_fault_and_priority_from_exception(exception: Exception): @rpc.if_notifications_enabled def notify_about_instance_action(context, instance, host, action, phase=None, source=fields.NotificationSource.COMPUTE, - exception=None, bdms=None): + exception=None, bdms=None, best_effort=False): """Send versioned notification about the action made on the instance :param instance: the instance which the action performed on :param host: the host emitting the notification @@ -481,7 +488,14 @@ def notify_about_instance_action(context, instance, host, action, phase=None, action=action, phase=phase), payload=payload) - notification.emit(context) + try: + notification.emit(context) + except Exception as e: + if best_effort: + LOG.error('Exception during notification sending: %s. ' + 'Attempting to proceed with normal operation.', e) + else: + raise e @rpc.if_notifications_enabled diff --git a/nova/tests/functional/compute/test_live_migration.py b/nova/tests/functional/compute/test_live_migration.py index fb7315a23cb6..f19c091ba72b 100644 --- a/nova/tests/functional/compute/test_live_migration.py +++ b/nova/tests/functional/compute/test_live_migration.py @@ -255,7 +255,8 @@ class LiveMigrationNeutronInteractionsTest( the network_info from the instance info cache, and not Neutron. """ def stub_notify(context, instance, event_suffix, - network_info=None, extra_usage_info=None, fault=None): + network_info=None, extra_usage_info=None, fault=None, + best_effort=False): vif = network_info[0] # Make sure we have the correct VIF (the NeutronFixture # deterministically uses port_2 for networks=auto) and that the diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py index ea88d5af86e8..fcfd3b5a4106 100644 --- a/nova/tests/unit/compute/test_compute.py +++ b/nova/tests/unit/compute/test_compute.py @@ -6675,7 +6675,8 @@ class ComputeTestCase(BaseTestCase, source_bdms=bdms) mock_notify.assert_has_calls([ mock.call(c, instance, 'fake-mini', - action='live_migration_post', phase='start'), + action='live_migration_post', phase='start', + best_effort=True), mock.call(c, instance, 'fake-mini', action='live_migration_post', phase='end')]) self.assertEqual(2, mock_notify.call_count) diff --git a/nova/tests/unit/compute/test_utils.py b/nova/tests/unit/compute/test_utils.py index dd10ecd7dfbb..dae78500141b 100644 --- a/nova/tests/unit/compute/test_utils.py +++ b/nova/tests/unit/compute/test_utils.py @@ -36,6 +36,7 @@ from nova import context from nova import exception from nova.image import glance from nova.network import model +from nova.notifications.objects import base as notifications_objects_base from nova import objects from nova.objects import base from nova.objects import block_device as block_device_obj @@ -472,6 +473,31 @@ class UsageInfoTestCase(test.TestCase): glance.generate_glance_url(self.context), uuids.fake_image_ref) self.assertEqual(payload['image_ref_url'], image_ref_url) + def test_notify_about_instance_action_best_effort(self): + instance = create_instance(self.context) + bdms = block_device_obj.block_device_make_list( + self.context, + [fake_block_device.FakeDbBlockDeviceDict( + {'source_type': 'volume', + 'device_name': '/dev/vda', + 'instance_uuid': 'f8000000-0000-0000-0000-000000000000', + 'destination_type': 'volume', + 'boot_index': 0, + 'volume_id': 'de8836ac-d75e-11e2-8271-5254009297d6'})]) + with mock.patch.object( + notifications_objects_base.NotificationBase, 'emit', + side_effect=Exception() + ) as mock_emit: + compute_utils.notify_about_instance_action( + self.context, + instance, + host='fake-compute', + action='delete', + phase='start', + bdms=bdms, + best_effort=True) + mock_emit.assert_called_once() + def test_notify_about_instance_action(self): instance = create_instance(self.context) bdms = block_device_obj.block_device_make_list( @@ -873,6 +899,24 @@ class UsageInfoTestCase(test.TestCase): self.assertEqual(200, payload['write_bytes']) self.assertEqual(200, payload['writes']) + def test_notify_about_instance_usage_best_effort(self): + instance = create_instance(self.context) + # Set some system metadata + sys_metadata = {'image_md_key1': 'val1', + 'image_md_key2': 'val2', + 'other_data': 'meow'} + instance.system_metadata.update(sys_metadata) + instance.save() + extra_usage_info = {'image_name': 'fake_name'} + notifier = rpc.get_notifier('compute') + with mock.patch.object( + notifier, 'info', side_effect=Exception() + ) as mock_info: + compute_utils.notify_about_instance_usage( + notifier, self.context, instance, 'create.start', + extra_usage_info=extra_usage_info, best_effort=True) + mock_info.assert_called_once() + def test_notify_about_instance_usage(self): instance = create_instance(self.context) # Set some system metadata