From 5694b98fc81032933b112528bf08cb6688fa7c1a Mon Sep 17 00:00:00 2001
From: Dmitry Tantsur <divius.inside@gmail.com>
Date: Tue, 20 Feb 2018 19:47:52 +0100
Subject: [PATCH] Rework logic handling reserved orphaned nodes in the
 conductor

If a conductor dies while holding a reservation, the node can get
stuck in its current state. Currently the conductor that takes
over the node only cleans it up if it's in the DEPLOYING state.

This change applies the same logic for all nodes:

1. Reservation is cleared by the conductor that took over the node
   no matter what provision state.

2. CLEANING is also aborted, nodes are moved to CLEAN FAIL with
   maintenance on.

3. Target power state is cleared as well.

The reservation is cleared even for nodes in maintenance mode,
otherwise it's impossible to move them out of maintenance.

Change-Id: I379c1335692046ca9423fda5ea68d2f10c065cb5
Closes-Bug: #1588901
---
 ironic/conductor/base_manager.py              |   9 +-
 ironic/conductor/manager.py                   |  71 +++++++++---
 ironic/conductor/utils.py                     |  21 ++++
 ironic/tests/unit/conductor/test_manager.py   | 104 ++++++++++++++----
 ironic/tests/unit/conductor/test_utils.py     |  19 ++++
 .../notes/orphan-nodes-389cb6d90c2917ec.yaml  |   9 ++
 6 files changed, 193 insertions(+), 40 deletions(-)
 create mode 100644 releasenotes/notes/orphan-nodes-389cb6d90c2917ec.yaml

diff --git a/ironic/conductor/base_manager.py b/ironic/conductor/base_manager.py
index a75a5161f0..2aaaab9a5e 100644
--- a/ironic/conductor/base_manager.py
+++ b/ironic/conductor/base_manager.py
@@ -22,6 +22,7 @@ from futurist import rejection
 from oslo_db import exception as db_exception
 from oslo_log import log
 from oslo_utils import excutils
+import six
 
 from ironic.common import context as ironic_context
 from ironic.common import driver_factory
@@ -440,7 +441,8 @@ class BaseConductorManager(object):
                          this would process nodes whose provision_updated_at
                          field value was 60 or more seconds before 'now'.
         :param: provision_state: provision_state that the node is in,
-                                 for the provisioning activity to have failed.
+                                 for the provisioning activity to have failed,
+                                 either one string or a set.
         :param: sort_key: the nodes are sorted based on this key.
         :param: callback_method: the callback method to be invoked in a
                                  spawned thread, for a failed node. This
@@ -457,6 +459,9 @@ class BaseConductorManager(object):
                                    fsm.
 
         """
+        if isinstance(provision_state, six.string_types):
+            provision_state = {provision_state}
+
         node_iter = self.iter_nodes(filters=filters,
                                     sort_key=sort_key,
                                     sort_dir='asc')
@@ -467,7 +472,7 @@ class BaseConductorManager(object):
                 with task_manager.acquire(context, node_uuid,
                                           purpose='node state check') as task:
                     if (task.node.maintenance or
-                            task.node.provision_state != provision_state):
+                            task.node.provision_state not in provision_state):
                         continue
 
                     target_state = (None if not keep_target_state else
diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py
index 6565513f7e..15c61242f9 100644
--- a/ironic/conductor/manager.py
+++ b/ironic/conductor/manager.py
@@ -1561,15 +1561,24 @@ class ConductorManager(base_manager.BaseConductorManager):
         self._fail_if_in_state(context, filters, states.DEPLOYWAIT,
                                sort_key, callback_method, err_handler)
 
-    @METRICS.timer('ConductorManager._check_deploying_status')
+    @METRICS.timer('ConductorManager._check_orphan_nodes')
     @periodics.periodic(spacing=CONF.conductor.check_provision_state_interval)
-    def _check_deploying_status(self, context):
-        """Periodically checks the status of nodes in DEPLOYING state.
+    def _check_orphan_nodes(self, context):
+        """Periodically checks the status of nodes that were taken over.
 
-        Periodically checks the nodes in DEPLOYING and the state of the
-        conductor deploying them. If we find out that a conductor that
-        was provisioning the node has died we then break release the
-        node and gracefully mark the deployment as failed.
+        Periodically checks the nodes that are managed by this conductor but
+        have a reservation from a conductor that went offline.
+
+        1. Nodes in DEPLOYING state move to DEPLOY FAIL.
+
+        2. Nodes in CLEANING state move to CLEAN FAIL with maintenance set.
+
+        3. Nodes in a transient power state get the power operation aborted.
+
+        4. Reservation is removed.
+
+        The latter operation happens even for nodes in maintenance mode,
+        otherwise it's not possible to move them out of maintenance.
 
         :param context: request context.
         """
@@ -1578,12 +1587,14 @@ class ConductorManager(base_manager.BaseConductorManager):
             return
 
         node_iter = self.iter_nodes(
-            fields=['id', 'reservation'],
-            filters={'provision_state': states.DEPLOYING,
-                     'maintenance': False,
-                     'reserved_by_any_of': offline_conductors})
+            fields=['id', 'reservation', 'maintenance', 'provision_state',
+                    'target_power_state'],
+            filters={'reserved_by_any_of': offline_conductors})
 
-        for node_uuid, driver, node_id, conductor_hostname in node_iter:
+        state_cleanup_required = []
+
+        for (node_uuid, driver, node_id, conductor_hostname,
+             maintenance, provision_state, target_power_state) in node_iter:
             # NOTE(lucasagomes): Although very rare, this may lead to a
             # race condition. By the time we release the lock the conductor
             # that was previously managing the node could be back online.
@@ -1604,11 +1615,43 @@ class ConductorManager(base_manager.BaseConductorManager):
                 LOG.warning("During checking for deploying state, when "
                             "releasing the lock of the node %s, it was "
                             "already unlocked.", node_uuid)
+            else:
+                LOG.warning('Forcibly removed reservation of conductor %(old)s'
+                            ' on node %(node)s as that conductor went offline',
+                            {'old': conductor_hostname, 'node': node_uuid})
+
+            # TODO(dtantsur): clean up all states that are not stable and
+            # are not one of WAIT states.
+            if not maintenance and (provision_state in (states.DEPLOYING,
+                                                        states.CLEANING) or
+                                    target_power_state is not None):
+                LOG.debug('Node %(node)s taken over from conductor %(old)s '
+                          'requires state clean up: provision state is '
+                          '%(state)s, target power state is %(pstate)s',
+                          {'node': node_uuid, 'old': conductor_hostname,
+                           'state': provision_state,
+                           'pstate': target_power_state})
+                state_cleanup_required.append(node_uuid)
+
+        for node_uuid in state_cleanup_required:
+            with task_manager.acquire(context, node_uuid,
+                                      purpose='power state clean up') as task:
+                if not task.node.maintenance and task.node.target_power_state:
+                    old_state = task.node.target_power_state
+                    task.node.target_power_state = None
+                    task.node.last_error = _('Pending power operation was '
+                                             'aborted due to conductor take '
+                                             'over')
+                    task.node.save()
+                    LOG.warning('Aborted pending power operation %(op)s '
+                                'on node %(node)s due to conductor take over',
+                                {'op': old_state, 'node': node_uuid})
 
             self._fail_if_in_state(
-                context, {'id': node_id}, states.DEPLOYING,
+                context, {'uuid': node_uuid},
+                {states.DEPLOYING, states.CLEANING},
                 'provision_updated_at',
-                callback_method=utils.cleanup_after_timeout,
+                callback_method=utils.abort_on_conductor_take_over,
                 err_handler=utils.provisioning_error_handler)
 
     @METRICS.timer('ConductorManager._do_adoption')
diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py
index 057d4dcd5c..a23390d5f9 100644
--- a/ironic/conductor/utils.py
+++ b/ironic/conductor/utils.py
@@ -377,6 +377,27 @@ def cleaning_error_handler(task, msg, tear_down_cleaning=True,
         task.process_event('fail', target_state=target_state)
 
 
+@task_manager.require_exclusive_lock
+def abort_on_conductor_take_over(task):
+    """Set node's state when a task was aborted due to conductor take over.
+
+    :param task: a TaskManager instance.
+    """
+    msg = _('Operation was aborted due to conductor take over')
+    # By this time the "fail" even was processed, so we cannot end up in
+    # CLEANING or CLEAN WAIT, only in CLEAN FAIL.
+    if task.node.provision_state == states.CLEANFAIL:
+        cleaning_error_handler(task, msg, set_fail_state=False)
+    else:
+        # For aborted deployment (and potentially other operations), just set
+        # the last_error accordingly.
+        task.node.last_error = msg
+        task.node.save()
+
+    LOG.warning('Aborted the current operation on node %s due to '
+                'conductor take over', task.node.uuid)
+
+
 def rescuing_error_handler(task, msg, set_fail_state=True):
     """Cleanup rescue task after timeout or failure.
 
diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py
index bfb0ba84c4..46ffa8cc77 100644
--- a/ironic/tests/unit/conductor/test_manager.py
+++ b/ironic/tests/unit/conductor/test_manager.py
@@ -6278,16 +6278,17 @@ class DestroyPortgroupTestCase(mgr_utils.ServiceSetUpMixin,
 @mock.patch.object(manager.ConductorManager, '_fail_if_in_state')
 @mock.patch.object(manager.ConductorManager, '_mapped_to_this_conductor')
 @mock.patch.object(dbapi.IMPL, 'get_offline_conductors')
-class ManagerCheckDeployingStatusTestCase(mgr_utils.ServiceSetUpMixin,
-                                          db_base.DbTestCase):
+class ManagerCheckOrphanNodesTestCase(mgr_utils.ServiceSetUpMixin,
+                                      db_base.DbTestCase):
     def setUp(self):
-        super(ManagerCheckDeployingStatusTestCase, self).setUp()
+        super(ManagerCheckOrphanNodesTestCase, self).setUp()
         self._start_service()
 
         self.node = obj_utils.create_test_node(
             self.context, id=1, uuid=uuidutils.generate_uuid(),
             driver='fake', provision_state=states.DEPLOYING,
-            target_provision_state=states.DEPLOYDONE,
+            target_provision_state=states.ACTIVE,
+            target_power_state=states.POWER_ON,
             reservation='fake-conductor')
 
         # create a second node in a different state to test the
@@ -6297,28 +6298,53 @@ class ManagerCheckDeployingStatusTestCase(mgr_utils.ServiceSetUpMixin,
             driver='fake', provision_state=states.AVAILABLE,
             target_provision_state=states.NOSTATE)
 
-    def test__check_deploying_status(self, mock_off_cond, mock_mapped,
-                                     mock_fail_if):
+    def test__check_orphan_nodes(self, mock_off_cond, mock_mapped,
+                                 mock_fail_if):
         mock_off_cond.return_value = ['fake-conductor']
 
-        self.service._check_deploying_status(self.context)
+        self.service._check_orphan_nodes(self.context)
 
         self.node.refresh()
         mock_off_cond.assert_called_once_with()
         mock_mapped.assert_called_once_with(self.node.uuid, 'fake')
         mock_fail_if.assert_called_once_with(
-            mock.ANY, {'id': self.node.id}, states.DEPLOYING,
+            mock.ANY, {'uuid': self.node.uuid},
+            {states.DEPLOYING, states.CLEANING},
             'provision_updated_at',
-            callback_method=conductor_utils.cleanup_after_timeout,
+            callback_method=conductor_utils.abort_on_conductor_take_over,
             err_handler=conductor_utils.provisioning_error_handler)
         # assert node was released
         self.assertIsNone(self.node.reservation)
+        self.assertIsNone(self.node.target_power_state)
+        self.assertIsNotNone(self.node.last_error)
 
-    def test__check_deploying_status_alive(self, mock_off_cond,
-                                           mock_mapped, mock_fail_if):
+    def test__check_orphan_nodes_cleaning(self, mock_off_cond, mock_mapped,
+                                          mock_fail_if):
+        self.node.provision_state = states.CLEANING
+        self.node.save()
+        mock_off_cond.return_value = ['fake-conductor']
+
+        self.service._check_orphan_nodes(self.context)
+
+        self.node.refresh()
+        mock_off_cond.assert_called_once_with()
+        mock_mapped.assert_called_once_with(self.node.uuid, 'fake')
+        mock_fail_if.assert_called_once_with(
+            mock.ANY, {'uuid': self.node.uuid},
+            {states.DEPLOYING, states.CLEANING},
+            'provision_updated_at',
+            callback_method=conductor_utils.abort_on_conductor_take_over,
+            err_handler=conductor_utils.provisioning_error_handler)
+        # assert node was released
+        self.assertIsNone(self.node.reservation)
+        self.assertIsNone(self.node.target_power_state)
+        self.assertIsNotNone(self.node.last_error)
+
+    def test__check_orphan_nodes_alive(self, mock_off_cond,
+                                       mock_mapped, mock_fail_if):
         mock_off_cond.return_value = []
 
-        self.service._check_deploying_status(self.context)
+        self.service._check_orphan_nodes(self.context)
 
         self.node.refresh()
         mock_off_cond.assert_called_once_with()
@@ -6328,7 +6354,7 @@ class ManagerCheckDeployingStatusTestCase(mgr_utils.ServiceSetUpMixin,
         self.assertIsNotNone(self.node.reservation)
 
     @mock.patch.object(objects.Node, 'release')
-    def test__check_deploying_status_release_exceptions_skipping(
+    def test__check_orphan_nodes_release_exceptions_skipping(
             self, mock_release, mock_off_cond, mock_mapped, mock_fail_if):
         mock_off_cond.return_value = ['fake-conductor']
         # Add another node so we can check both exceptions
@@ -6341,7 +6367,7 @@ class ManagerCheckDeployingStatusTestCase(mgr_utils.ServiceSetUpMixin,
         mock_mapped.return_value = True
         mock_release.side_effect = [exception.NodeNotFound('not found'),
                                     exception.NodeLocked('locked')]
-        self.service._check_deploying_status(self.context)
+        self.service._check_orphan_nodes(self.context)
 
         self.node.refresh()
         mock_off_cond.assert_called_once_with()
@@ -6351,22 +6377,52 @@ class ManagerCheckDeployingStatusTestCase(mgr_utils.ServiceSetUpMixin,
         # Assert we skipped and didn't try to call _fail_if_in_state
         self.assertFalse(mock_fail_if.called)
 
-    @mock.patch.object(objects.Node, 'release')
-    def test__check_deploying_status_release_node_not_locked(
-            self, mock_release, mock_off_cond, mock_mapped, mock_fail_if):
+    def test__check_orphan_nodes_release_node_not_locked(
+            self, mock_off_cond, mock_mapped, mock_fail_if):
+        # this simulates releasing the node elsewhere
+        count = [0]
+
+        def _fake_release(*args, **kwargs):
+            self.node.reservation = None
+            self.node.save()
+            # raise an exception only the first time release is called
+            count[0] += 1
+            if count[0] == 1:
+                raise exception.NodeNotLocked('not locked')
+
         mock_off_cond.return_value = ['fake-conductor']
         mock_mapped.return_value = True
-        mock_release.side_effect = exception.NodeNotLocked('not locked')
-        self.service._check_deploying_status(self.context)
+        with mock.patch.object(objects.Node, 'release',
+                               side_effect=_fake_release) as mock_release:
+            self.service._check_orphan_nodes(self.context)
+            mock_release.assert_called_with(self.context, mock.ANY,
+                                            self.node.id)
+
+        mock_off_cond.assert_called_once_with()
+        mock_mapped.assert_called_once_with(self.node.uuid, 'fake')
+        mock_fail_if.assert_called_once_with(
+            mock.ANY, {'uuid': self.node.uuid},
+            {states.DEPLOYING, states.CLEANING},
+            'provision_updated_at',
+            callback_method=conductor_utils.abort_on_conductor_take_over,
+            err_handler=conductor_utils.provisioning_error_handler)
+
+    def test__check_orphan_nodes_maintenance(self, mock_off_cond, mock_mapped,
+                                             mock_fail_if):
+        self.node.maintenance = True
+        self.node.save()
+        mock_off_cond.return_value = ['fake-conductor']
+
+        self.service._check_orphan_nodes(self.context)
 
         self.node.refresh()
         mock_off_cond.assert_called_once_with()
         mock_mapped.assert_called_once_with(self.node.uuid, 'fake')
-        mock_fail_if.assert_called_once_with(
-            mock.ANY, {'id': self.node.id}, states.DEPLOYING,
-            'provision_updated_at',
-            callback_method=conductor_utils.cleanup_after_timeout,
-            err_handler=conductor_utils.provisioning_error_handler)
+        # assert node was released
+        self.assertIsNone(self.node.reservation)
+        # not changing states in maintenance
+        self.assertFalse(mock_fail_if.called)
+        self.assertIsNotNone(self.node.target_power_state)
 
 
 class TestIndirectionApiConductor(db_base.DbTestCase):
diff --git a/ironic/tests/unit/conductor/test_utils.py b/ironic/tests/unit/conductor/test_utils.py
index f1d96618c6..e5db5a44af 100644
--- a/ironic/tests/unit/conductor/test_utils.py
+++ b/ironic/tests/unit/conductor/test_utils.py
@@ -1234,6 +1234,25 @@ class ErrorHandlersTestCase(tests_base.TestCase):
         conductor_utils.cleaning_error_handler(self.task, 'foo')
         self.assertTrue(log_mock.exception.called)
 
+    def test_abort_on_conductor_take_over_cleaning(self):
+        self.node.maintenance = False
+        self.node.provision_state = states.CLEANFAIL
+        conductor_utils.abort_on_conductor_take_over(self.task)
+        self.assertTrue(self.node.maintenance)
+        self.assertIn('take over', self.node.maintenance_reason)
+        self.assertIn('take over', self.node.last_error)
+        self.task.driver.deploy.tear_down_cleaning.assert_called_once_with(
+            self.task)
+        self.node.save.assert_called_once_with()
+
+    def test_abort_on_conductor_take_over_deploying(self):
+        self.node.maintenance = False
+        self.node.provision_state = states.DEPLOYFAIL
+        conductor_utils.abort_on_conductor_take_over(self.task)
+        self.assertFalse(self.node.maintenance)
+        self.assertIn('take over', self.node.last_error)
+        self.node.save.assert_called_once_with()
+
     @mock.patch.object(conductor_utils, 'LOG')
     def test_spawn_cleaning_error_handler_no_worker(self, log_mock):
         exc = exception.NoFreeConductorWorker()
diff --git a/releasenotes/notes/orphan-nodes-389cb6d90c2917ec.yaml b/releasenotes/notes/orphan-nodes-389cb6d90c2917ec.yaml
new file mode 100644
index 0000000000..c5c3f1e116
--- /dev/null
+++ b/releasenotes/notes/orphan-nodes-389cb6d90c2917ec.yaml
@@ -0,0 +1,9 @@
+---
+fixes:
+  - |
+    On node take over, any locks that are left from the old conductor are
+    cleared by the new one. Previously it only happened for nodes in
+    ``DEPLOYING`` state.
+  - |
+    On taking over nodes in ``CLEANING`` state, the new conductor moves them
+    to ``CLEAN FAIL`` and set maintenance.