Fixed cluster state transitions

Added new status for cluster: * partially_deployed - the nodes of cluster has different statuses. Change-Id: I5be8c4ed5f58e2c6545a7f06b8eab1ae37c8575f Closes-Bug: 1542201
2016-02-12 10:41:25 +03:00 · 2016-02-12 10:41:25 +03:00 · 72499871b7
parent 9c012b22bb
commit 72499871b7
8 changed files with 111 additions and 49 deletions
--- a/nailgun/nailgun/consts.py
+++ b/nailgun/nailgun/consts.py
@ -51,6 +51,7 @@ CLUSTER_STATUSES = Enum(
    'operational',
    'error',
    'remove',
+    'partially_deployed'
 )

 NETWORKS = Enum(
--- a/nailgun/nailgun/db/migration/alembic_migrations/versions/fuel_9_0.py
+++ b/nailgun/nailgun/db/migration/alembic_migrations/versions/fuel_9_0.py
@ -51,6 +51,7 @@ cluster_statuses_new = (
    'operational',
    'error',
    'remove',
+    'partially_deployed'
 )


--- a/nailgun/nailgun/db/sqlalchemy/models/cluster.py
+++ b/nailgun/nailgun/db/sqlalchemy/models/cluster.py
@ -147,7 +147,8 @@ class Cluster(Base):
    @property
    def is_locked(self):
        allowed_status = (
-            consts.CLUSTER_STATUSES.new, consts.CLUSTER_STATUSES.stopped
+            consts.CLUSTER_STATUSES.new, consts.CLUSTER_STATUSES.stopped,
+            consts.CLUSTER_STATUSES.partially_deployed
        )
        return self.status not in allowed_status or bool(db().query(
            db().query(Node).filter_by(
--- a/nailgun/nailgun/objects/task.py
+++ b/nailgun/nailgun/objects/task.py
@ -163,12 +163,19 @@ class Task(NailgunObject):
                n.error_type = error_type

    @classmethod
-    def __update_cluster_status(cls, cluster, status):
+    def __update_cluster_status(cls, cluster, status, expected_node_status):
        logger.debug(
            "Updating cluster (%s) status: from %s to %s",
            cluster.full_name, cluster.status, status)

-        data = {'status': status}
+        if expected_node_status is not None:
+            remaining = Cluster.get_nodes_count_unmet_status(
+                cluster, expected_node_status
+            )
+            if remaining > 0:
+                logger.debug("Detect that cluster '%s' is partially deployed.",
+                             cluster.id)
+                status = consts.CLUSTER_STATUSES.partially_deployed

        # FIXME(aroma): remove updating of 'deployed_before'
        # when stop action is reworked. 'deployed_before'
@ -178,63 +185,82 @@ class Task(NailgunObject):
        if status == consts.CLUSTER_STATUSES.operational:
            Cluster.set_deployed_before_flag(cluster, value=True)

-        Cluster.update(cluster, data)
+        Cluster.update(cluster, {'status': status})

    @classmethod
    def _update_cluster_data(cls, instance):
        cluster = instance.cluster

-        if instance.name == 'deploy':
-            if instance.status == 'ready':
+        if instance.name == consts.TASK_NAMES.deployment:
+            if instance.status == consts.TASK_STATUSES.ready:
                # If for some reasons orchestrator
                # didn't send ready status for node
                # we should set it explicitly
                for n in cluster.nodes:
-                    if n.status == 'deploying':
-                        n.status = 'ready'
+                    if n.status == consts.NODE_STATUSES.deploying:
+                        n.status = consts.NODE_STATUSES.ready
                        n.progress = 100

-                cls.__update_cluster_status(cluster, 'operational')
+                cls.__update_cluster_status(
+                    cluster,
+                    consts.CLUSTER_STATUSES.operational,
+                    consts.NODE_STATUSES.ready
+                )

                Cluster.clear_pending_changes(cluster)

-            elif instance.status == 'error' and \
-                    not TaskHelper.before_deployment_error(instance):
-                # We don't want to set cluster status to
-                # error because we don't want to lock
-                # settings if cluster wasn't delpoyed
-
-                cls.__update_cluster_status(cluster, 'error')
-
+            elif instance.status == consts.CLUSTER_STATUSES.error:
+                cls.__update_cluster_status(
+                    cluster, consts.CLUSTER_STATUSES.error, None
+                )
+                q_nodes_to_error = TaskHelper.get_nodes_to_deployment_error(
+                    cluster
+                )
+                cls.__update_nodes_to_error(
+                    q_nodes_to_error, error_type=consts.NODE_ERRORS.deploy
+                )
        elif instance.name == consts.TASK_NAMES.spawn_vms:
            if instance.status == consts.TASK_STATUSES.ready:
                Cluster.set_vms_created_state(cluster)
            elif instance.status == consts.TASK_STATUSES.error and \
                    not TaskHelper.before_deployment_error(instance):
-                cls.__update_cluster_status(cluster, 'error')
-        elif instance.name == 'deployment' and instance.status == 'error':
-            cls.__update_cluster_status(cluster, 'error')
+                cls.__update_cluster_status(
+                    cluster, consts.CLUSTER_STATUSES.error, None
+                )
+        elif instance.name == consts.TASK_NAMES.deploy and \
+                instance.status == consts.TASK_STATUSES.error and \
+                not TaskHelper.before_deployment_error(instance):
+            # We don't want to set cluster status to
+            # error because we don't want to lock
+            # settings if cluster wasn't deployed

-            q_nodes_to_error = \
-                TaskHelper.get_nodes_to_deployment_error(cluster)
+            cls.__update_cluster_status(
+                cluster, consts.CLUSTER_STATUSES.error, None
+            )

-            cls.__update_nodes_to_error(q_nodes_to_error,
-                                        error_type='deploy')
+        elif instance.name == consts.TASK_NAMES.provision:
+            if instance.status == consts.TASK_STATUSES.ready:
+                cls.__update_cluster_status(
+                    cluster, consts.CLUSTER_STATUSES.partially_deployed, None
+                )
+            elif instance.status == consts.TASK_STATUSES.error:
+                cls.__update_cluster_status(
+                    cluster, consts.CLUSTER_STATUSES.error, None
+                )
+                q_nodes_to_error = \
+                    TaskHelper.get_nodes_to_provisioning_error(cluster)

-        elif instance.name == 'provision' and instance.status == 'error':
-            cls.__update_cluster_status(cluster, 'error')
-
-            q_nodes_to_error = \
-                TaskHelper.get_nodes_to_provisioning_error(cluster)
-
-            cls.__update_nodes_to_error(q_nodes_to_error,
-                                        error_type='provision')
-
-        elif instance.name == 'stop_deployment':
-            if instance.status == 'error':
-                cls.__update_cluster_status(cluster, 'error')
+                cls.__update_nodes_to_error(
+                    q_nodes_to_error, error_type=consts.NODE_ERRORS.provision)
+        elif instance.name == consts.TASK_NAMES.stop_deployment:
+            if instance.status == consts.TASK_STATUSES.error:
+                cls.__update_cluster_status(
+                    consts.CLUSTER_STATUSES.error, None
+                )
            else:
-                cls.__update_cluster_status(cluster, 'stopped')
+                cls.__update_cluster_status(
+                    cluster, consts.CLUSTER_STATUSES.stopped, None
+                )

    @classmethod
    def _clean_data(cls, data):
--- a/nailgun/nailgun/test/integration/test_rpc_consumer.py
+++ b/nailgun/nailgun/test/integration/test_rpc_consumer.py
@ -1542,7 +1542,7 @@ class TestConsumer(BaseReciverTestCase):
        self.assertEqual(nodes[1].status, consts.NODE_STATUSES.ready)
        self.assertEqual(task.status, consts.TASK_STATUSES.ready)

-    def _check_success_message(self, callback, task_name, node_status):
+    def _check_success_message(self, callback, task_name, c_status, n_status):
        self.env.create(
            cluster_kwargs={},
            nodes_kwargs=[
@ -1566,31 +1566,38 @@ class TestConsumer(BaseReciverTestCase):
            'task_uuid': task.uuid,
            'status': consts.TASK_STATUSES.ready,
            'progress': 100,
-            'nodes': [{'uid': nodes[0].uid, 'status':node_status}]
+            'nodes': [{'uid': nodes[0].uid, 'status': n_status}]
        }
        callback(**params)
        self.assertEqual(
            "{0} of 1 environment node(s) is done.".format(task_title),
            task.message
        )
+        self.db.refresh(cluster)
+        self.assertEqual(
+            consts.CLUSTER_STATUSES.partially_deployed, cluster.status
+        )
        params['nodes'] = []
        callback(**params)
        self.assertEqual(
            "{0} is done. No changes.".format(task_title),
            task.message
        )
-        params['nodes'] = [{'uid': nodes[1].uid, 'status':node_status}]
+        params['nodes'] = [{'uid': nodes[1].uid, 'status': n_status}]
        callback(**params)
        self.assertEqual(
            "{0} of environment '{1}' is done."
            .format(task_title, cluster.name),
            task.message
        )
+        self.db.refresh(cluster)
+        self.assertEqual(c_status, cluster.status)

    def test_success_deploy_messsage(self):
        self._check_success_message(
            self.receiver.deploy_resp,
            consts.TASK_NAMES.deployment,
+            consts.CLUSTER_STATUSES.operational,
            consts.NODE_STATUSES.ready
        )

@ -1598,8 +1605,10 @@ class TestConsumer(BaseReciverTestCase):
        self._check_success_message(
            self.receiver.deploy_resp,
            consts.TASK_NAMES.provision,
+            consts.CLUSTER_STATUSES.partially_deployed,
            consts.NODE_STATUSES.provisioned
        )
+        self.assertFalse(self.env.clusters[-1].is_locked)


 class TestResetEnvironment(BaseReciverTestCase):
--- a/nailgun/nailgun/test/unit/test_migration_fuel_9_0.py
+++ b/nailgun/nailgun/test/unit/test_migration_fuel_9_0.py
@ -440,6 +440,17 @@ class TestNodeAttributesMigration(base.BaseAlembicMigrationTest):
                self.assertEqual(db_value, '{}')


+class TestClusterStatusMigration(base.BaseAlembicMigrationTest):
+    def test_cluster_status_upgraded(self):
+        clusters_table = self.meta.tables['clusters']
+        columns = [clusters_table.c.id, clusters_table.c.status]
+        cluster = db.execute(sa.select(columns)).fetchone()
+
+        db.execute(clusters_table.update().where(
+            clusters_table.c.id == cluster.id
+        ).values(status=consts.CLUSTER_STATUSES.partially_deployed))
+
+
 class TestRemoveWizardMetadata(base.BaseAlembicMigrationTest):

    def test_wizard_metadata_does_not_exist_in_releases(self):
--- a/nailgun/nailgun/test/unit/test_objects.py
+++ b/nailgun/nailgun/test/unit/test_objects.py
@ -747,12 +747,15 @@ class TestTaskObject(BaseIntegrationTest):
        self._nodes_should_not_be_error(self.cluster.nodes[1:])

    def test_update_cluster_to_operational(self):
-        task = Task(name=consts.TASK_NAMES.deploy,
+        task = Task(name=consts.TASK_NAMES.deployment,
                    cluster=self.cluster,
                    status=consts.TASK_STATUSES.ready)
        self.db.add(task)
        self.db.flush()

+        for node in self.env.nodes:
+            node.status = consts.NODE_STATUSES.ready
+
        objects.Task._update_cluster_data(task)
        self.db.flush()

@ -789,7 +792,7 @@ class TestTaskObject(BaseIntegrationTest):
        self.cluster.nodes[0].status = consts.NODE_STATUSES.deploying
        self.cluster.nodes[0].progress = 24

-        task = Task(name=consts.TASK_NAMES.deploy,
+        task = Task(name=consts.TASK_NAMES.deployment,
                    cluster=self.cluster,
                    status=consts.TASK_STATUSES.ready)
        self.db.add(task)
--- a/nailgun/nailgun/test/unit/test_task.py
+++ b/nailgun/nailgun/test/unit/test_task.py
@ -194,34 +194,44 @@ class TestHelperUpdateClusterStatus(BaseTestCase):
        self.nodes_should_not_be_error(self.cluster.nodes[1:])

    def test_update_cluster_to_operational(self):
-        deploy_task = Task(name='deploy', cluster=self.cluster, status='ready')
+        deploy_task = Task(
+            name=consts.TASK_NAMES.deployment,
+            cluster=self.cluster, status=consts.TASK_STATUSES.ready
+        )
+        for node in self.env.nodes:
+            node.status = consts.NODE_STATUSES.ready
        self.db.add(deploy_task)
        self.db.commit()

        objects.Task._update_cluster_data(deploy_task)
        self.db.flush()

-        self.assertEqual(self.cluster.status, 'operational')
+        self.assertEqual(
+            self.cluster.status, consts.CLUSTER_STATUSES.operational)

    def test_update_if_parent_task_is_ready_all_nodes_should_be_ready(self):
        for node in self.cluster.nodes:
-            node.status = 'ready'
+            node.status = consts.NODE_STATUSES.ready
            node.progress = 100

-        self.cluster.nodes[0].status = 'deploying'
+        self.cluster.nodes[0].status = consts.NODE_STATUSES.deploying
        self.cluster.nodes[0].progress = 24

-        deploy_task = Task(name='deploy', cluster=self.cluster, status='ready')
+        deploy_task = Task(
+            name=consts.TASK_NAMES.deployment,
+            cluster=self.cluster, status=consts.TASK_STATUSES.ready
+        )
        self.db.add(deploy_task)
        self.db.commit()

        objects.Task._update_cluster_data(deploy_task)
        self.db.flush()

-        self.assertEqual(self.cluster.status, 'operational')
+        self.assertEqual(
+            self.cluster.status, consts.CLUSTER_STATUSES.operational)

        for node in self.cluster.nodes:
-            self.assertEqual(node.status, 'ready')
+            self.assertEqual(node.status, consts.NODE_STATUSES.ready)
            self.assertEqual(node.progress, 100)

    def test_update_cluster_status_if_task_was_already_in_error_status(self):