From 56d3cd7aa7f4d3be01fd2a5c10903fb548c49458 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Mon, 25 Nov 2019 13:35:44 -0800 Subject: [PATCH] Add a way to exit early from a wait_for_instance_event() In some situations, we may enter into a race-safe wait context for an external event and then determine within the context that the event would have already fired or for some other reason waiting is not required. In that case, this introduces a VirtAPI method to abort the wait for specific events in a clean way. This should be used sparingly and only in certain circumstances where it is clear that the event has already occurred. For example, if the action triggering the event was initiated outside of the wait context, we can enter the wait context (establishing a waiting event to capture), and then poll once for completion of that thing. If incomplete, we will block and wait on context exit as usual. If the thing we are waiting for has already completed, then we can avoid the long wait by calling the early-exit helper and avoid what would ultimately be a timeout and failure. We are specifically adding this to address the cyborg scenario as follows: Once we select a host in the conductor for an instance build, we can call to cyborg to start the preparation (i.e. programming) of the device on the destination host. This takes time, and by kicking it off early, we can overlap that process with other work we have to do. Because that will send an event upon completion, which will be routed to the compute, we will race to start the build process on the compute and begin the race-safe event wait in the compute manager. Instead of collapsing the window and waiting synchronously, we can trigger it early, then start the wait in the compute later, check for early completion *after* the wait has been set up, and then exit the wait early if we determine that the event had already been sent. This allows us to be async with cyborg across two services while still being safe in the critical region where we need to setup the wait before we rely on the event being sent. Change-Id: I8e5a0683069b2f322cb059d6eb501d732d1bd851 --- nova/compute/manager.py | 46 +++++++++++++++++++++++-- nova/tests/unit/compute/test_virtapi.py | 27 +++++++++++++++ nova/virt/fake.py | 4 +++ nova/virt/virtapi.py | 3 ++ 4 files changed, 77 insertions(+), 3 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 6042c16591ff..9fafdd1522d0 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -418,6 +418,22 @@ class ComputeVirtAPI(virtapi.VirtAPI): self._compute = compute self.reportclient = compute.reportclient + class ExitEarly(Exception): + def __init__(self, events): + super(Exception, self).__init__() + self.events = events + + self._exit_early_exc = ExitEarly + + def exit_wait_early(self, events): + """Exit a wait_for_instance_event() immediately and avoid + waiting for some events. + + :param: events: A list of (name, tag) tuples for events that we should + skip waiting for during a wait_for_instance_event(). + """ + raise self._exit_early_exc(events=events) + def _default_error_callback(self, event_name, instance): raise exception.NovaException(_('Instance event failed')) @@ -445,6 +461,17 @@ class ComputeVirtAPI(virtapi.VirtAPI): waiting for the rest of the events, False to stop processing, or raise an exception which will bubble up to the waiter. + If the inner code wishes to abort waiting for one or more + events because it knows some state to be finished or condition + to be satisfied, it can use VirtAPI.exit_wait_early() with a + list of event (name,tag) items to avoid waiting for those + events upon context exit. Note that exit_wait_early() exits + the context immediately and should be used to signal that all + work has been completed and provide the unified list of events + that need not be waited for. Waiting for the remaining events + will begin immediately upon early exit as if the context was + exited normally. + :param instance: The instance for which an event is expected :param event_names: A list of event names. Each element is a tuple of strings to indicate (name, tag), @@ -471,12 +498,25 @@ class ComputeVirtAPI(virtapi.VirtAPI): # should all be canceled and fired immediately below, # but don't stick around if not. deadline = 0 - yield + try: + yield + except self._exit_early_exc as e: + early_events = set([objects.InstanceExternalEvent.make_key(n, t) + for n, t in e.events]) + else: + early_events = [] + with eventlet.timeout.Timeout(deadline): for event_name, event in events.items(): - actual_event = event.wait() - if actual_event.status == 'completed': + if event_name in early_events: continue + else: + actual_event = event.wait() + if actual_event.status == 'completed': + continue + # If we get here, we have an event that was not completed, + # nor skipped via exit_wait_early(). Decide whether to + # keep waiting by calling the error_callback() hook. decision = error_callback(event_name, instance) if decision is False: break diff --git a/nova/tests/unit/compute/test_virtapi.py b/nova/tests/unit/compute/test_virtapi.py index 038072499ac0..b1993d7114fc 100644 --- a/nova/tests/unit/compute/test_virtapi.py +++ b/nova/tests/unit/compute/test_virtapi.py @@ -48,6 +48,9 @@ class VirtAPIBaseTest(test.NoDBTestCase, test.APICoverage): self.assertExpected('wait_for_instance_event', 'instance', ['event']) + def test_exit_wait_early(self): + self.assertExpected('exit_wait_early', []) + def test_update_compute_provider_status(self): self.assertExpected('update_compute_provider_status', nova_context.get_admin_context(), uuids.rp_uuid, @@ -67,6 +70,8 @@ class FakeVirtAPITest(VirtAPIBaseTest): with self.virtapi.wait_for_instance_event(*args, **kwargs): run = True self.assertTrue(run) + elif method == 'exit_wait_early': + self.virtapi.exit_wait_early(*args, **kwargs) elif method == 'update_compute_provider_status': self.virtapi.update_compute_provider_status(*args, **kwargs) else: @@ -120,6 +125,12 @@ class ComputeVirtAPITest(VirtAPIBaseTest): self.compute = FakeCompute() self.virtapi = compute_manager.ComputeVirtAPI(self.compute) + def test_exit_wait_early(self): + self.assertRaises(self.virtapi._exit_early_exc, + self.virtapi.exit_wait_early, + [('foo', 'bar'), + ('foo', 'baz')]) + def test_wait_for_instance_event(self): and_i_ran = '' event_1_tag = objects.InstanceExternalEvent.make_key( @@ -182,6 +193,22 @@ class ComputeVirtAPITest(VirtAPIBaseTest): self.assertRaises(test.TestingException, do_test) + def test_wait_for_instance_event_exit_early(self): + # Wait for two events, exit early skipping one. + # Make sure we waited for one and did not wait for the other + with self.virtapi.wait_for_instance_event('instance', + [('foo', 'bar'), + ('foo', 'baz')]): + self.virtapi.exit_wait_early([('foo', 'baz')]) + self.fail('never gonna happen') + + self.assertEqual(2, len(self.compute._events)) + for event in self.compute._events: + if event.tag == 'bar': + event.wait.assert_called_once_with() + else: + event.wait.assert_not_called() + def test_update_compute_provider_status(self): """Tests scenarios for adding/removing the COMPUTE_STATUS_DISABLED trait on a given compute node resource provider. diff --git a/nova/virt/fake.py b/nova/virt/fake.py index d4bbc9da5d55..581e97c37116 100644 --- a/nova/virt/fake.py +++ b/nova/virt/fake.py @@ -666,6 +666,10 @@ class FakeVirtAPI(virtapi.VirtAPI): # fall through yield + def exit_wait_early(self, events): + # We never wait, so there is nothing to exit early + pass + def update_compute_provider_status(self, context, rp_uuid, enabled): pass diff --git a/nova/virt/virtapi.py b/nova/virt/virtapi.py index a8e7d3355848..dad1197d8ccb 100644 --- a/nova/virt/virtapi.py +++ b/nova/virt/virtapi.py @@ -21,6 +21,9 @@ class VirtAPI(object): error_callback=None): raise NotImplementedError() + def exit_wait_early(self, events): + raise NotImplementedError() + def update_compute_provider_status(self, context, rp_uuid, enabled): """Used to add/remove the COMPUTE_STATUS_DISABLED trait on the provider