From 0f2d87416eff1e96c0fbf0f4b08bf6b6b22246d5 Mon Sep 17 00:00:00 2001 From: Guang Yee Date: Thu, 18 May 2017 16:38:16 -0700 Subject: [PATCH] make sure to rebuild claim on recreate On recreate where the instance is being evacuated to a different node, we should be rebuilding the claim so the migration context is available when rebuilding the instance. Conflicts: nova/compute/manager.py nova/tests/unit/compute/test_compute_mgr.py NOTE(mriedem): There are a few issues here: 1. I5aaa869f2e6155964827e659d18e2bcaad9d866b changed the LOG.info method to not pass a context in Ocata. 2. I57233259065d887b38a79850a05177fcbbdfb8c3 changed some tests in test_compute_manager in Ocata, but is irrelevant here. 3. The bigger change isn't a merge conflict but in Ocata the compute manager code was all refactored so that the _get_resource_tracker method no longer needed a nodename passed to it. In Newton, however, if we're force evacuating (scenario 3) then we don't have a scheduled_node passed to the rebuild_instance method and in this case we need to lookup the nodename for the host we're currently on. To resolve this, some existing code that handles this case is moved up where it is needed to get the resource tracker so we can get the rebuild_claim method. We let any ComputeHostNotFound exception raise up in this case rather than log it because without the compute node we can't make the rebuild claim and we need to fail. Tests are adjusted accordingly for this. Change-Id: I53bdcf8edf640e97b4632ef7a093f14a6e3845e4 Closes-Bug: 1658070 (cherry picked from commit a2b0824aca5cb4a2ae579f625327c51ed0414d35) (cherry picked from commit ea90c60b07534a46541c55432389f2d50b5b7d0a) --- nova/compute/manager.py | 36 +++++++++++++-------- nova/tests/unit/compute/test_compute.py | 17 +++++----- nova/tests/unit/compute/test_compute_mgr.py | 4 ++- 3 files changed, 34 insertions(+), 23 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 7b403e9cc257..b7edfa985906 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -2660,7 +2660,28 @@ class ComputeManager(manager.Manager): LOG.info(_LI("Rebuilding instance"), context=context, instance=instance) - if scheduled_node is not None: + + # NOTE(gyee): there are three possible scenarios. + # + # 1. instance is being rebuilt on the same node. In this case, + # recreate should be False and scheduled_node should be None. + # 2. instance is being rebuilt on a node chosen by the + # scheduler (i.e. evacuate). In this case, scheduled_node should + # be specified and recreate should be True. + # 3. instance is being rebuilt on a node chosen by the user. (i.e. + # force evacuate). In this case, scheduled_node is not specified + # and recreate is set to True. + # + # For scenarios #2 and #3, we must do rebuild claim as server is + # being evacuated to a different node. + if recreate or scheduled_node is not None: + if scheduled_node is None: + # NOTE(mriedem): On a recreate (evacuate), we need to update + # the instance's host and node properties to reflect it's + # destination node for the recreate, and to make a rebuild + # claim. + compute_node = self._get_compute_info(context, self.host) + scheduled_node = compute_node.hypervisor_hostname rt = self._get_resource_tracker(scheduled_node) rebuild_claim = rt.rebuild_claim else: @@ -2670,19 +2691,8 @@ class ComputeManager(manager.Manager): if image_ref: image_meta = self.image_api.get(context, image_ref) - # NOTE(mriedem): On a recreate (evacuate), we need to update - # the instance's host and node properties to reflect it's - # destination node for the recreate. if not scheduled_node: - if recreate: - try: - compute_node = self._get_compute_info(context, self.host) - scheduled_node = compute_node.hypervisor_hostname - except exception.ComputeHostNotFound: - LOG.exception(_LE('Failed to get compute_info for %s'), - self.host) - else: - scheduled_node = instance.node + scheduled_node = instance.node with self._error_out_instance_on_exception(context, instance): try: diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py index 02d0598a9bda..bc1bb0523fbc 100644 --- a/nova/tests/unit/compute/test_compute.py +++ b/nova/tests/unit/compute/test_compute.py @@ -11345,14 +11345,7 @@ class EvacuateHostTestCase(BaseTestCase): mock.patch.object(self.compute, '_get_compute_info', side_effect=fake_get_compute_info) ) as (mock_inst, mock_get): - self._rebuild() - - # Should be on destination host - instance = db.instance_get(self.context, self.inst.id) - self.assertEqual(instance['host'], self.compute.host) - self.assertIsNone(instance['node']) - self.assertTrue(mock_inst.called) - self.assertTrue(mock_get.called) + self.assertRaises(exception.ComputeHostNotFound, self._rebuild) def test_rebuild_on_host_node_passed(self): patch_get_info = mock.patch.object(self.compute, '_get_compute_info') @@ -11540,15 +11533,21 @@ class EvacuateHostTestCase(BaseTestCase): network_info=mock.ANY, block_device_info=mock.ANY) - def test_rebuild_migration_passed_in(self): + @mock.patch('nova.compute.manager.ComputeManager._get_compute_info') + @mock.patch('nova.compute.manager.ComputeManager._get_resource_tracker') + def test_rebuild_migration_passed_in(self, get_rt, get_compute): migration = mock.Mock(spec=objects.Migration) patch_spawn = mock.patch.object(self.compute.driver, 'spawn') patch_on_disk = mock.patch.object( self.compute.driver, 'instance_on_disk', return_value=True) + get_compute.return_value = objects.ComputeNode( + hypervisor_hostname=NODENAME) with patch_spawn, patch_on_disk: self._rebuild(migration=migration) + get_rt.assert_called_once_with(NODENAME) + self.assertTrue(get_rt.return_value.rebuild_claim.called) self.assertEqual('done', migration.status) migration.save.assert_called_once_with() diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 694f014413fb..7e00e5fcad35 100755 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -2951,17 +2951,19 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase): node=dead_node) instance.migration_context = None with test.nested( + mock.patch.object(self.compute, '_get_resource_tracker'), mock.patch.object(self.compute, '_get_compute_info'), mock.patch.object(self.compute, '_do_rebuild_instance_with_claim'), mock.patch.object(objects.Instance, 'save'), mock.patch.object(self.compute, '_set_migration_status'), - ) as (mock_get, mock_rebuild, mock_save, mock_set): + ) as (mock_rt, mock_get, mock_rebuild, mock_save, mock_set): mock_get.return_value.hypervisor_hostname = 'new-node' self.compute.rebuild_instance(self.context, instance, None, None, None, None, None, None, True) mock_get.assert_called_once_with(mock.ANY, self.compute.host) self.assertEqual('new-node', instance.node) mock_set.assert_called_once_with(None, 'done') + mock_rt.assert_called_once_with('new-node') def test_rebuild_default_impl(self): def _detach(context, bdms):