Add recreate test for unshelve offloaded instance spawn fail

This adds a functional test to recreate bug 1713796 where allocations are not cleaned up from the compute node when unshelving an offloaded server fails when spawning the guest. Change-Id: I3237ec954f6504513c8ef5a6ba43f57d0d2622a3 Related-Bug: #1713796
2017-09-21 19:11:31 -04:00 · 2017-09-21 19:11:31 -04:00 · 56232e5de9
parent 52268672da
commit 56232e5de9
2 changed files with 118 additions and 0 deletions
--- a/nova/tests/functional/test_servers.py
+++ b/nova/tests/functional/test_servers.py
@ -23,6 +23,7 @@ from oslo_serialization import base64
 from oslo_utils import timeutils

 from nova.compute import api as compute_api
+from nova.compute import instance_actions
 from nova.compute import rpcapi
 from nova import context
 from nova import exception
@ -2421,3 +2422,104 @@ class ServerBuildAbortTests(ProviderUsageBaseTestCase):
        # Expects no allocation records on the failed host.
        self.assertFlavorMatchesAllocation(
           {'vcpus': 0, 'ram': 0, 'disk': 0}, failed_usages)
+
+
+class ServerUnshelveSpawnFailTests(ProviderUsageBaseTestCase):
+    """Tests server unshelve scenarios which trigger a
+    VirtualInterfaceCreateException during driver.spawn() and validates that
+    allocations in Placement are properly cleaned up.
+    """
+
+    compute_driver = 'fake.FakeUnshelveSpawnFailDriver'
+
+    def setUp(self):
+        super(ServerUnshelveSpawnFailTests, self).setUp()
+        # We only need one compute service/host/node for these tests.
+        fake.set_nodes(['host1'])
+        self.flags(host='host1')
+        self.compute1 = self.start_service('compute', host='host1')
+
+        flavors = self.api.get_flavors()
+        self.flavor1 = flavors[0]
+
+    # TODO(mriedem): move this into InstanceHelperMixin
+    def _wait_for_unshelve_fail_completion(self, server, expected_action):
+        """Polls instance action events for the given instance and action
+        until it finds the compute_unshelve_instance action event with an error
+        result.
+        """
+        completion_event = None
+        for attempt in range(10):
+            actions = self.api.get_instance_actions(server['id'])
+            # Look for the migrate action.
+            for action in actions:
+                if action['action'] == expected_action:
+                    events = (
+                        self.api.api_get(
+                            '/servers/%s/os-instance-actions/%s' %
+                            (server['id'], action['request_id'])
+                        ).body['instanceAction']['events'])
+                    # Look for the compute_unshelve_instance being in error
+                    # state.
+                    for event in events:
+                        if (event['event'] == 'compute_unshelve_instance' and
+                                event['result'] is not None and
+                                event['result'].lower() == 'error'):
+                            completion_event = event
+                            # Break out of the events loop.
+                            break
+                    if completion_event:
+                        # Break out of the actions loop.
+                        break
+            # We didn't find the completion event yet, so wait a bit.
+            time.sleep(0.5)
+
+        if completion_event is None:
+            self.fail('Timed out waiting for compute_unshelve_instance '
+                      'failure event. Current instance actions: %s' % actions)
+
+    def test_driver_spawn_fail_when_unshelving_instance(self):
+        """Tests that allocations, created by the scheduler, are cleaned
+        from the target node when the unshelve driver.spawn fails on that node.
+        """
+        hostname = self.compute1.manager.host
+        rp_uuid = self._get_provider_uuid_by_host(hostname)
+        usages = self._get_provider_usages(rp_uuid)
+        # We start with no usages on the host.
+        self.assertFlavorMatchesAllocation(
+           {'vcpus': 0, 'ram': 0, 'disk': 0}, usages)
+
+        server_req = self._build_minimal_create_server_request(
+            self.api, 'unshelve-spawn-fail', flavor_id=self.flavor1['id'],
+            image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
+            networks='none')
+
+        server = self.api.post_server({'server': server_req})
+        self._wait_for_state_change(self.api, server, 'ACTIVE')
+
+        # assert allocations exist for the host
+        usages = self._get_provider_usages(rp_uuid)
+        self.assertFlavorMatchesAllocation(self.flavor1, usages)
+
+        # shelve offload the server
+        self.flags(shelved_offload_time=0)
+        self.api.post_server_action(server['id'], {'shelve': None})
+        self._wait_for_state_change(self.api, server, 'SHELVED_OFFLOADED')
+
+        # assert allocations were removed from the host
+        usages = self._get_provider_usages(rp_uuid)
+        self.assertFlavorMatchesAllocation(
+           {'vcpus': 0, 'ram': 0, 'disk': 0}, usages)
+
+        # unshelve the server, which should fail
+        self.api.post_server_action(server['id'], {'unshelve': None})
+        self._wait_for_unshelve_fail_completion(
+            server, instance_actions.UNSHELVE)
+
+        # assert allocations were removed from the host
+        usages = self._get_provider_usages(rp_uuid)
+        # FIXME: this is bug 1713796 where the allocations aren't cleaned up;
+        # remove once fixed
+        self.assertFlavorMatchesAllocation(self.flavor1, usages)
+        # self.assertFlavorMatchesAllocation(
+        #    {'vcpus': 0, 'ram': 0, 'disk': 0}, usages)
--- a/nova/virt/fake.py
+++ b/nova/virt/fake.py
@ -33,6 +33,7 @@ from oslo_utils import versionutils

 from nova.compute import power_state
 from nova.compute import task_states
+from nova.compute import vm_states
 import nova.conf
 from nova.console import type as ctype
 from nova import exception
@ -635,3 +636,18 @@ class FakeBuildAbortDriver(FakeDriver):
              admin_password, network_info=None, block_device_info=None):
        raise exception.BuildAbortException(
            instance_uuid=instance.uuid, reason='FakeBuildAbortDriver')
+
+
+class FakeUnshelveSpawnFailDriver(FakeDriver):
+    """FakeDriver derivative that always fails on spawn() with a
+    VirtualInterfaceCreateException when unshelving an offloaded instance.
+    """
+    def spawn(self, context, instance, image_meta, injected_files,
+              admin_password, network_info=None, block_device_info=None):
+        if instance.vm_state == vm_states.SHELVED_OFFLOADED:
+            raise exception.VirtualInterfaceCreateException(
+                'FakeUnshelveSpawnFailDriver')
+        # Otherwise spawn normally during the initial build.
+        super(FakeUnshelveSpawnFailDriver, self).spawn(
+            context, instance, image_meta, injected_files,
+            admin_password, network_info, block_device_info)