Avoid race with nova on power sync and rescue

Nova power sync can see an instance off during the rescue test,
and as a result decide that the power must remain off. As such,
we kind of need to fake it for a moment until we actually turn
power back on.

This is done in ironic to help make a minimal behavior fix as
ironic will return the power state to ``POWER_ON``.

Change-Id: Ib8210062560d6f5c7061caf1fe19f11290578e51
Story: 2003990
Task: 26934
This commit is contained in:
Julia Kreger 2018-10-06 05:50:04 -07:00
parent bbcafc04b9
commit 223a24674e
2 changed files with 53 additions and 0 deletions

View File

@ -814,6 +814,15 @@ class AgentRescue(base.RescueInterface):
:returns: Returns states.RESCUEWAIT
"""
manager_utils.node_power_action(task, states.POWER_OFF)
# NOTE(TheJulia): Revealing that the power is off at any time can
# cause external power sync to decide that the node must be off.
# This may result in a post-rescued instance being turned off
# unexpectedly after rescue has started.
# TODO(TheJulia): Once we have power/state callbacks to nova,
# the reset of the power_state can be removed.
task.node.power_state = states.POWER_ON
task.node.save()
task.driver.boot.clean_up_instance(task)
task.driver.network.unconfigure_tenant_networks(task)
task.driver.network.add_rescuing_network(task)
@ -840,6 +849,16 @@ class AgentRescue(base.RescueInterface):
:returns: Returns states.ACTIVE
"""
manager_utils.node_power_action(task, states.POWER_OFF)
# NOTE(TheJulia): Revealing that the power is off at any time can
# cause external power sync to decide that the node must be off.
# This may result in a post-rescued insance being turned off
# unexpectedly after unrescue.
# TODO(TheJulia): Once we have power/state callbacks to nova,
# the reset of the power_state can be removed.
task.node.power_state = states.POWER_ON
task.node.save()
self.clean_up(task)
task.driver.network.configure_tenant_networks(task)
task.driver.boot.prepare_instance(task)

View File

@ -1660,6 +1660,40 @@ class AgentRescueTestCase(db_base.DbTestCase):
mock_prepare_instance.assert_called_once_with(mock.ANY, task)
self.assertEqual(states.ACTIVE, result)
@mock.patch.object(fake.FakeBoot, 'clean_up_instance', autospec=True)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
def test_agent_rescue_power_on(self, mock_node_power_action,
mock_clean_up_instance):
self.node.power_state = states.POWER_ON
mock_clean_up_instance.side_effect = exception.IronicException()
with task_manager.acquire(self.context, self.node.uuid) as task:
self.assertRaises(exception.IronicException,
task.driver.rescue.rescue, task)
mock_node_power_action.assert_called_once_with(task,
states.POWER_OFF)
task.node.refresh()
# Ensure that our stored power state while the lock is still
# being held, shows as POWER_ON to an external reader, such
# as the API.
self.assertEqual(states.POWER_ON, task.node.power_state)
@mock.patch.object(fake.FakeBoot, 'clean_up_ramdisk', autospec=True)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
def test_agent_unrescue_power_on(self, mock_node_power_action,
mock_clean_ramdisk):
self.node.power_state = states.POWER_ON
mock_clean_ramdisk.side_effect = exception.IronicException()
with task_manager.acquire(self.context, self.node.uuid) as task:
self.assertRaises(exception.IronicException,
task.driver.rescue.unrescue, task)
mock_node_power_action.assert_called_once_with(task,
states.POWER_OFF)
task.node.refresh()
# Ensure that our stored power state while the lock is still
# being held, shows as POWER_ON to an external reader, such
# as the API.
self.assertEqual(states.POWER_ON, task.node.power_state)
@mock.patch.object(flat_network.FlatNetwork, 'validate_rescue',
autospec=True)
@mock.patch.object(fake.FakeBoot, 'validate', autospec=True)