From 6369f39244558b147f7b0796269d9a86ce9b12d8 Mon Sep 17 00:00:00 2001
From: Matt Riedemann <mriedem.os@gmail.com>
Date: Fri, 7 Dec 2018 17:27:16 -0500
Subject: [PATCH] Remove allocations before setting vm_status to
 SHELVED_OFFLOADED

Tempest is intermittently failing a test which does the
following:

1. Create a server.
2. Shelve offload it.
3. Unshelve it.

Tempest waits for the server status to be SHELVED_OFFLOADED
before unshelving the server, which goes through the
scheduler to pick a compute node and claim resources on it.

When shelve offloading a server, the resource allocations
for the instance and compute node it was on are cleared, which
will also delete the internal consumer record in the placement
service.

The race is that the allocations are removed during shelve
offload *after* the server status changes to SHELVED_OFFLOADED.
This leaves a window where unshelve is going through the
scheduler and gets the existing allocations for the instance,
which are non-empty and have a consumer generation. The
claim_resources method in the scheduler then uses that
consumer generation when PUTing the allocations. That PUT
fails because in between the GET and PUT of the allocations,
placement has deleted the internal consumer record. When
PUTing the new allocations with a non-null consumer generation,
placement returns a 409 conflict error because for a new
consumer it expects the "consumer_generation" parameter to be
None.

This change handles the race by simply making sure the allocations
are deleted (along with the related consumer record in placement)
*before* the instance.vm_status is changed.

Change-Id: I2a6ccaff904c1f0759d55feeeef0ec1da32c65df
Closes-Bug: #1798688
---
 nova/compute/manager.py                | 10 ++++++---
 nova/tests/unit/compute/test_shelve.py | 28 +++++++++++++++++---------
 2 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 10a38511dad2..0b5d201ba6d5 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -4931,6 +4931,13 @@ class ComputeManager(manager.Manager):
         # terminate all the connections with the volume server and the host
         self._terminate_volume_connections(context, instance, bdms)
 
+        # Free up the resource allocations in the placement service.
+        # This should happen *before* the vm_state is changed to
+        # SHELVED_OFFLOADED in case client-side code is polling the API to
+        # schedule more instances (or unshelve) once this server is offloaded.
+        rt = self._get_resource_tracker()
+        rt.delete_allocation_for_shelve_offloaded_instance(context, instance)
+
         instance.power_state = current_power_state
         # NOTE(mriedem): The vm_state has to be set before updating the
         # resource tracker, see vm_states.ALLOW_RESOURCE_REMOVAL. The host/node
@@ -4944,9 +4951,6 @@ class ComputeManager(manager.Manager):
         # NOTE(ndipanov): Free resources from the resource tracker
         self._update_resource_tracker(context, instance)
 
-        rt = self._get_resource_tracker()
-        rt.delete_allocation_for_shelve_offloaded_instance(context, instance)
-
         # NOTE(sfinucan): RPC calls should no longer be attempted against this
         # instance, so ensure any calls result in errors
         self._nil_out_instance_obj_host_and_node(instance)
diff --git a/nova/tests/unit/compute/test_shelve.py b/nova/tests/unit/compute/test_shelve.py
index 78a2a252637f..2ac38c2c3fda 100644
--- a/nova/tests/unit/compute/test_shelve.py
+++ b/nova/tests/unit/compute/test_shelve.py
@@ -225,16 +225,24 @@ class ShelveComputeManagerTestCase(test_compute.BaseTestCase):
         fake_bdms = objects.BlockDeviceMappingList()
         mock_get_bdms.return_value = fake_bdms
 
-        with mock.patch.object(instance, 'save'):
-            self.compute.shelve_offload_instance(self.context, instance,
-                                                 clean_shutdown=clean_shutdown)
-            mock_notify.assert_has_calls([
-                mock.call(self.context, instance, 'fake-mini',
-                          action='shelve_offload', phase='start',
-                          bdms=fake_bdms),
-                mock.call(self.context, instance, 'fake-mini',
-                          action='shelve_offload', phase='end',
-                          bdms=fake_bdms)])
+        def stub_instance_save(inst, *args, **kwargs):
+            # If the vm_state is changed to SHELVED_OFFLOADED make sure we
+            # have already freed up allocations in placement.
+            if inst.vm_state == vm_states.SHELVED_OFFLOADED:
+                self.assertTrue(mock_delete_alloc.called,
+                                'Allocations must be deleted before the '
+                                'vm_status can change to shelved_offloaded.')
+
+        self.stub_out('nova.objects.Instance.save', stub_instance_save)
+        self.compute.shelve_offload_instance(self.context, instance,
+                                             clean_shutdown=clean_shutdown)
+        mock_notify.assert_has_calls([
+            mock.call(self.context, instance, 'fake-mini',
+                      action='shelve_offload', phase='start',
+                      bdms=fake_bdms),
+            mock.call(self.context, instance, 'fake-mini',
+                      action='shelve_offload', phase='end',
+                      bdms=fake_bdms)])
 
         self.assertEqual(vm_states.SHELVED_OFFLOADED, instance.vm_state)
         self.assertIsNone(instance.task_state)