From 223a24674ee4a68c9f63516384c5485d9958bcb3 Mon Sep 17 00:00:00 2001 From: Julia Kreger Date: Sat, 6 Oct 2018 05:50:04 -0700 Subject: [PATCH] Avoid race with nova on power sync and rescue Nova power sync can see an instance off during the rescue test, and as a result decide that the power must remain off. As such, we kind of need to fake it for a moment until we actually turn power back on. This is done in ironic to help make a minimal behavior fix as ironic will return the power state to ``POWER_ON``. Change-Id: Ib8210062560d6f5c7061caf1fe19f11290578e51 Story: 2003990 Task: 26934 --- ironic/drivers/modules/agent.py | 19 +++++++++++ .../tests/unit/drivers/modules/test_agent.py | 34 +++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/ironic/drivers/modules/agent.py b/ironic/drivers/modules/agent.py index 9a4421c05c..80be59d0f7 100644 --- a/ironic/drivers/modules/agent.py +++ b/ironic/drivers/modules/agent.py @@ -814,6 +814,15 @@ class AgentRescue(base.RescueInterface): :returns: Returns states.RESCUEWAIT """ manager_utils.node_power_action(task, states.POWER_OFF) + # NOTE(TheJulia): Revealing that the power is off at any time can + # cause external power sync to decide that the node must be off. + # This may result in a post-rescued instance being turned off + # unexpectedly after rescue has started. + # TODO(TheJulia): Once we have power/state callbacks to nova, + # the reset of the power_state can be removed. + task.node.power_state = states.POWER_ON + task.node.save() + task.driver.boot.clean_up_instance(task) task.driver.network.unconfigure_tenant_networks(task) task.driver.network.add_rescuing_network(task) @@ -840,6 +849,16 @@ class AgentRescue(base.RescueInterface): :returns: Returns states.ACTIVE """ manager_utils.node_power_action(task, states.POWER_OFF) + + # NOTE(TheJulia): Revealing that the power is off at any time can + # cause external power sync to decide that the node must be off. + # This may result in a post-rescued insance being turned off + # unexpectedly after unrescue. + # TODO(TheJulia): Once we have power/state callbacks to nova, + # the reset of the power_state can be removed. + task.node.power_state = states.POWER_ON + task.node.save() + self.clean_up(task) task.driver.network.configure_tenant_networks(task) task.driver.boot.prepare_instance(task) diff --git a/ironic/tests/unit/drivers/modules/test_agent.py b/ironic/tests/unit/drivers/modules/test_agent.py index 005763eba5..fba4807c4c 100644 --- a/ironic/tests/unit/drivers/modules/test_agent.py +++ b/ironic/tests/unit/drivers/modules/test_agent.py @@ -1660,6 +1660,40 @@ class AgentRescueTestCase(db_base.DbTestCase): mock_prepare_instance.assert_called_once_with(mock.ANY, task) self.assertEqual(states.ACTIVE, result) + @mock.patch.object(fake.FakeBoot, 'clean_up_instance', autospec=True) + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + def test_agent_rescue_power_on(self, mock_node_power_action, + mock_clean_up_instance): + self.node.power_state = states.POWER_ON + mock_clean_up_instance.side_effect = exception.IronicException() + with task_manager.acquire(self.context, self.node.uuid) as task: + self.assertRaises(exception.IronicException, + task.driver.rescue.rescue, task) + mock_node_power_action.assert_called_once_with(task, + states.POWER_OFF) + task.node.refresh() + # Ensure that our stored power state while the lock is still + # being held, shows as POWER_ON to an external reader, such + # as the API. + self.assertEqual(states.POWER_ON, task.node.power_state) + + @mock.patch.object(fake.FakeBoot, 'clean_up_ramdisk', autospec=True) + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + def test_agent_unrescue_power_on(self, mock_node_power_action, + mock_clean_ramdisk): + self.node.power_state = states.POWER_ON + mock_clean_ramdisk.side_effect = exception.IronicException() + with task_manager.acquire(self.context, self.node.uuid) as task: + self.assertRaises(exception.IronicException, + task.driver.rescue.unrescue, task) + mock_node_power_action.assert_called_once_with(task, + states.POWER_OFF) + task.node.refresh() + # Ensure that our stored power state while the lock is still + # being held, shows as POWER_ON to an external reader, such + # as the API. + self.assertEqual(states.POWER_ON, task.node.power_state) + @mock.patch.object(flat_network.FlatNetwork, 'validate_rescue', autospec=True) @mock.patch.object(fake.FakeBoot, 'validate', autospec=True)