Set server status to ERROR if rebuild failed

Currently there is no indication that the rebuild was refused,
and worse, we may have a wrong imageref for the instance.

This patch set the instance to ERROR status if rebuild failed in the
scheduling stage. The user can rebuild the instance with valid image
to get it out of ERROR state and reset with right instance metadata and
properties.

Closes-Bug: 1744325

 Conflicts:
    nova/conductor/manager.py
    nova/tests/functional/regressions/test_bug_1713783.py
    nova/tests/functional/test_servers.py
    nova/tests/unit/conductor/test_conductor.py

NOTE(melwitt): The conflicts were because of log translation, the fact
that the regression test for bug 1713783 doesn't exist in Ocata, there
are addtional rebuild functional tests in Pike that don't exist in
Ocata, and the select_destinations method has additional parameters in
Pike that don't exist in Ocata.

Change-Id: Ibb7bee15a3d4ee6f0ef53ba12e8b41f65a1fe999
(cherry picked from commit d03a890a34)
(cherry picked from commit 22a39b8f9b)
This commit is contained in:
int32bit 2018-01-22 17:05:53 +08:00 committed by melanie witt
parent b47656272c
commit 83fd8ac0bf
5 changed files with 33 additions and 9 deletions

View File

@ -16,6 +16,7 @@
import contextlib
import copy
import sys
from oslo_config import cfg
from oslo_log import log as logging
@ -735,19 +736,23 @@ class ComputeTaskManager(base.Base):
with excutils.save_and_reraise_exception():
self._set_vm_state_and_notify(context, instance.uuid,
'rebuild_server',
{'vm_state': instance.vm_state,
{'vm_state': vm_states.ERROR,
'task_state': None}, ex, request_spec)
LOG.warning(_LW("No valid host found for rebuild"),
instance=instance)
compute_utils.add_instance_fault_from_exc(context,
instance, ex, sys.exc_info())
except exception.UnsupportedPolicyException as ex:
with excutils.save_and_reraise_exception():
self._set_vm_state_and_notify(context, instance.uuid,
'rebuild_server',
{'vm_state': instance.vm_state,
{'vm_state': vm_states.ERROR,
'task_state': None}, ex, request_spec)
LOG.warning(_LW("Server with unsupported policy "
"cannot be rebuilt"),
instance=instance)
compute_utils.add_instance_fault_from_exc(context,
instance, ex, sys.exc_info())
try:
migration = objects.Migration.get_by_instance_and_status(

View File

@ -455,7 +455,7 @@ class ServerGroupTestV21(ServerGroupTestBase):
self.admin_api.post_server_action(servers[1]['id'], post)
server_after_failed_evac = self._wait_for_state_change(
self.admin_api, servers[1], 'ACTIVE')
self.admin_api, servers[1], 'ERROR')
# assert that after a failed evac the server active on the same host
# as before
@ -479,7 +479,7 @@ class ServerGroupTestV21(ServerGroupTestBase):
self.admin_api.post_server_action(servers[1]['id'], post)
server_after_failed_evac = self._wait_for_state_change(
self.admin_api, servers[1], 'ACTIVE')
self.admin_api, servers[1], 'ERROR')
# assert that after a failed evac the server active on the same host
# as before
@ -653,7 +653,7 @@ class ServerGroupTestV215(ServerGroupTestV21):
self.admin_api.post_server_action(servers[1]['id'], post)
server_after_failed_evac = self._wait_for_state_change(
self.admin_api, servers[1], 'ACTIVE')
self.admin_api, servers[1], 'ERROR')
# assert that after a failed evac the server active on the same host
# as before
@ -677,7 +677,7 @@ class ServerGroupTestV215(ServerGroupTestV21):
self.admin_api.post_server_action(servers[1]['id'], post)
server_after_failed_evac = self._wait_for_state_change(
self.admin_api, servers[1], 'ACTIVE')
self.admin_api, servers[1], 'ERROR')
# assert that after a failed evac the server active on the same host
# as before

View File

@ -1004,3 +1004,7 @@ class ServerRebuildTestCase(integrated_helpers._IntegratedTestBase,
# even though the rebuild should not work.
server = self.api.get_server(server['id'])
self.assertEqual(rebuild_image_ref, server['image']['id'])
# The server should be in ERROR state
self.assertEqual('ERROR', server['status'])
self.assertIn('No valid host', server['fault']['message'])

View File

@ -1268,8 +1268,10 @@ class _BaseTaskTestCase(object):
'select_destinations',
side_effect=exc.NoValidHost(reason='')),
mock.patch('nova.scheduler.utils.build_request_spec',
return_value=request_spec)
) as (rebuild_mock, sig_mock, fp_mock, select_dest_mock, bs_mock):
return_value=request_spec),
mock.patch.object(scheduler_utils, 'set_vm_state_and_notify')
) as (rebuild_mock, sig_mock, fp_mock,
select_dest_mock, bs_mock, set_vm_state_and_notify_mock):
self.assertRaises(exc.NoValidHost,
self.conductor_manager.rebuild_instance,
context=self.context, instance=inst_obj,
@ -1277,7 +1279,11 @@ class _BaseTaskTestCase(object):
fp_mock.assert_called_once_with(self.context, request_spec,
filter_properties)
select_dest_mock.assert_called_once_with(self.context, fake_spec)
self.assertEqual(
set_vm_state_and_notify_mock.call_args[0][4]['vm_state'],
vm_states.ERROR)
self.assertFalse(rebuild_mock.called)
self.assertIn('No valid host', inst_obj.fault.message)
@mock.patch.object(conductor_manager.compute_rpcapi.ComputeAPI,
'rebuild_instance')
@ -1309,12 +1315,14 @@ class _BaseTaskTestCase(object):
self.context,
inst_obj,
**rebuild_args)
updates = {'vm_state': vm_states.ACTIVE, 'task_state': None}
updates = {'vm_state': vm_states.ERROR, 'task_state': None}
state_mock.assert_called_once_with(self.context, inst_obj.uuid,
'rebuild_server', updates,
exception, request_spec)
self.assertFalse(select_dest_mock.called)
self.assertFalse(rebuild_mock.called)
self.assertIn('ServerGroup policy is not supported',
inst_obj.fault.message)
def test_rebuild_instance_evacuate_migration_record(self):
inst_obj = self._create_fake_instance_obj()

View File

@ -0,0 +1,7 @@
---
fixes:
- |
If scheduling fails during rebuild the server instance will go to ERROR
state and a fault will be recorded. `Bug 1744325`_
.. _Bug 1744325: https://bugs.launchpad.net/nova/+bug/1744325