Allow best effort sending of notifications

In the previous patch we changed the ordering of operations during
post_live_migration() to minimize guest networking downtime by
activating destination host port bindings as soon as possible.

Review of that patch led to the realization that exceptions during
notification sending can prevent the port binding activation from
happening. Instead of handling that in a localized try/catch, this
patch implements a general best_effort kwarg to our two notification
sending helpers to allow callers to indicate that any exceptions
during notification sending should not be fatal.

Change-Id: I01a15d6fffe98816ae019e67dc72784299fedfd3
This commit is contained in:
Artom Lifshitz 2023-12-16 11:14:30 -05:00
parent 26fbc9e8e7
commit 06d25926a1
5 changed files with 72 additions and 10 deletions

View File

@ -2261,11 +2261,12 @@ class ComputeManager(manager.Manager):
def _notify_about_instance_usage(self, context, instance, event_suffix,
network_info=None, extra_usage_info=None,
fault=None):
fault=None, best_effort=False):
compute_utils.notify_about_instance_usage(
self.notifier, context, instance, event_suffix,
network_info=network_info,
extra_usage_info=extra_usage_info, fault=fault)
extra_usage_info=extra_usage_info, fault=fault,
best_effort=best_effort)
def _deallocate_network(self, context, instance,
requested_networks=None):
@ -9358,11 +9359,12 @@ class ComputeManager(manager.Manager):
self._notify_about_instance_usage(ctxt, instance,
"live_migration._post.start",
network_info=network_info)
network_info=network_info,
best_effort=True)
compute_utils.notify_about_instance_action(
ctxt, instance, self.host,
action=fields.NotificationAction.LIVE_MIGRATION_POST,
phase=fields.NotificationPhase.START)
phase=fields.NotificationPhase.START, best_effort=True)
migration = objects.Migration(
source_compute=self.host, dest_compute=dest,

View File

@ -406,7 +406,7 @@ def notify_usage_exists(notifier, context, instance_ref, host,
def notify_about_instance_usage(notifier, context, instance, event_suffix,
network_info=None, extra_usage_info=None,
fault=None):
fault=None, best_effort=False):
"""Send an unversioned legacy notification about an instance.
All new notifications should use notify_about_instance_action which sends
@ -435,7 +435,14 @@ def notify_about_instance_usage(notifier, context, instance, event_suffix,
else:
method = notifier.info
method(context, 'compute.instance.%s' % event_suffix, usage_info)
try:
method(context, 'compute.instance.%s' % event_suffix, usage_info)
except Exception as e:
if best_effort:
LOG.error('Exception during notification sending: %s. '
'Attempting to proceed with normal operation.', e)
else:
raise e
def _get_fault_and_priority_from_exception(exception: Exception):
@ -454,7 +461,7 @@ def _get_fault_and_priority_from_exception(exception: Exception):
@rpc.if_notifications_enabled
def notify_about_instance_action(context, instance, host, action, phase=None,
source=fields.NotificationSource.COMPUTE,
exception=None, bdms=None):
exception=None, bdms=None, best_effort=False):
"""Send versioned notification about the action made on the instance
:param instance: the instance which the action performed on
:param host: the host emitting the notification
@ -481,7 +488,14 @@ def notify_about_instance_action(context, instance, host, action, phase=None,
action=action,
phase=phase),
payload=payload)
notification.emit(context)
try:
notification.emit(context)
except Exception as e:
if best_effort:
LOG.error('Exception during notification sending: %s. '
'Attempting to proceed with normal operation.', e)
else:
raise e
@rpc.if_notifications_enabled

View File

@ -255,7 +255,8 @@ class LiveMigrationNeutronInteractionsTest(
the network_info from the instance info cache, and not Neutron.
"""
def stub_notify(context, instance, event_suffix,
network_info=None, extra_usage_info=None, fault=None):
network_info=None, extra_usage_info=None, fault=None,
best_effort=False):
vif = network_info[0]
# Make sure we have the correct VIF (the NeutronFixture
# deterministically uses port_2 for networks=auto) and that the

View File

@ -6675,7 +6675,8 @@ class ComputeTestCase(BaseTestCase,
source_bdms=bdms)
mock_notify.assert_has_calls([
mock.call(c, instance, 'fake-mini',
action='live_migration_post', phase='start'),
action='live_migration_post', phase='start',
best_effort=True),
mock.call(c, instance, 'fake-mini',
action='live_migration_post', phase='end')])
self.assertEqual(2, mock_notify.call_count)

View File

@ -36,6 +36,7 @@ from nova import context
from nova import exception
from nova.image import glance
from nova.network import model
from nova.notifications.objects import base as notifications_objects_base
from nova import objects
from nova.objects import base
from nova.objects import block_device as block_device_obj
@ -472,6 +473,31 @@ class UsageInfoTestCase(test.TestCase):
glance.generate_glance_url(self.context), uuids.fake_image_ref)
self.assertEqual(payload['image_ref_url'], image_ref_url)
def test_notify_about_instance_action_best_effort(self):
instance = create_instance(self.context)
bdms = block_device_obj.block_device_make_list(
self.context,
[fake_block_device.FakeDbBlockDeviceDict(
{'source_type': 'volume',
'device_name': '/dev/vda',
'instance_uuid': 'f8000000-0000-0000-0000-000000000000',
'destination_type': 'volume',
'boot_index': 0,
'volume_id': 'de8836ac-d75e-11e2-8271-5254009297d6'})])
with mock.patch.object(
notifications_objects_base.NotificationBase, 'emit',
side_effect=Exception()
) as mock_emit:
compute_utils.notify_about_instance_action(
self.context,
instance,
host='fake-compute',
action='delete',
phase='start',
bdms=bdms,
best_effort=True)
mock_emit.assert_called_once()
def test_notify_about_instance_action(self):
instance = create_instance(self.context)
bdms = block_device_obj.block_device_make_list(
@ -873,6 +899,24 @@ class UsageInfoTestCase(test.TestCase):
self.assertEqual(200, payload['write_bytes'])
self.assertEqual(200, payload['writes'])
def test_notify_about_instance_usage_best_effort(self):
instance = create_instance(self.context)
# Set some system metadata
sys_metadata = {'image_md_key1': 'val1',
'image_md_key2': 'val2',
'other_data': 'meow'}
instance.system_metadata.update(sys_metadata)
instance.save()
extra_usage_info = {'image_name': 'fake_name'}
notifier = rpc.get_notifier('compute')
with mock.patch.object(
notifier, 'info', side_effect=Exception()
) as mock_info:
compute_utils.notify_about_instance_usage(
notifier, self.context, instance, 'create.start',
extra_usage_info=extra_usage_info, best_effort=True)
mock_info.assert_called_once()
def test_notify_about_instance_usage(self):
instance = create_instance(self.context)
# Set some system metadata