Fixed cluster state transitions

Added new status for cluster:
* partially_deployed - the nodes of cluster has different statuses.

Change-Id: I5be8c4ed5f58e2c6545a7f06b8eab1ae37c8575f
Closes-Bug: 1542201
This commit is contained in:
Bulat Gaifullin 2016-02-12 10:41:25 +03:00
parent 9c012b22bb
commit 72499871b7
8 changed files with 111 additions and 49 deletions

View File

@ -51,6 +51,7 @@ CLUSTER_STATUSES = Enum(
'operational',
'error',
'remove',
'partially_deployed'
)
NETWORKS = Enum(

View File

@ -51,6 +51,7 @@ cluster_statuses_new = (
'operational',
'error',
'remove',
'partially_deployed'
)

View File

@ -147,7 +147,8 @@ class Cluster(Base):
@property
def is_locked(self):
allowed_status = (
consts.CLUSTER_STATUSES.new, consts.CLUSTER_STATUSES.stopped
consts.CLUSTER_STATUSES.new, consts.CLUSTER_STATUSES.stopped,
consts.CLUSTER_STATUSES.partially_deployed
)
return self.status not in allowed_status or bool(db().query(
db().query(Node).filter_by(

View File

@ -163,12 +163,19 @@ class Task(NailgunObject):
n.error_type = error_type
@classmethod
def __update_cluster_status(cls, cluster, status):
def __update_cluster_status(cls, cluster, status, expected_node_status):
logger.debug(
"Updating cluster (%s) status: from %s to %s",
cluster.full_name, cluster.status, status)
data = {'status': status}
if expected_node_status is not None:
remaining = Cluster.get_nodes_count_unmet_status(
cluster, expected_node_status
)
if remaining > 0:
logger.debug("Detect that cluster '%s' is partially deployed.",
cluster.id)
status = consts.CLUSTER_STATUSES.partially_deployed
# FIXME(aroma): remove updating of 'deployed_before'
# when stop action is reworked. 'deployed_before'
@ -178,63 +185,82 @@ class Task(NailgunObject):
if status == consts.CLUSTER_STATUSES.operational:
Cluster.set_deployed_before_flag(cluster, value=True)
Cluster.update(cluster, data)
Cluster.update(cluster, {'status': status})
@classmethod
def _update_cluster_data(cls, instance):
cluster = instance.cluster
if instance.name == 'deploy':
if instance.status == 'ready':
if instance.name == consts.TASK_NAMES.deployment:
if instance.status == consts.TASK_STATUSES.ready:
# If for some reasons orchestrator
# didn't send ready status for node
# we should set it explicitly
for n in cluster.nodes:
if n.status == 'deploying':
n.status = 'ready'
if n.status == consts.NODE_STATUSES.deploying:
n.status = consts.NODE_STATUSES.ready
n.progress = 100
cls.__update_cluster_status(cluster, 'operational')
cls.__update_cluster_status(
cluster,
consts.CLUSTER_STATUSES.operational,
consts.NODE_STATUSES.ready
)
Cluster.clear_pending_changes(cluster)
elif instance.status == 'error' and \
not TaskHelper.before_deployment_error(instance):
# We don't want to set cluster status to
# error because we don't want to lock
# settings if cluster wasn't delpoyed
cls.__update_cluster_status(cluster, 'error')
elif instance.status == consts.CLUSTER_STATUSES.error:
cls.__update_cluster_status(
cluster, consts.CLUSTER_STATUSES.error, None
)
q_nodes_to_error = TaskHelper.get_nodes_to_deployment_error(
cluster
)
cls.__update_nodes_to_error(
q_nodes_to_error, error_type=consts.NODE_ERRORS.deploy
)
elif instance.name == consts.TASK_NAMES.spawn_vms:
if instance.status == consts.TASK_STATUSES.ready:
Cluster.set_vms_created_state(cluster)
elif instance.status == consts.TASK_STATUSES.error and \
not TaskHelper.before_deployment_error(instance):
cls.__update_cluster_status(cluster, 'error')
elif instance.name == 'deployment' and instance.status == 'error':
cls.__update_cluster_status(cluster, 'error')
cls.__update_cluster_status(
cluster, consts.CLUSTER_STATUSES.error, None
)
elif instance.name == consts.TASK_NAMES.deploy and \
instance.status == consts.TASK_STATUSES.error and \
not TaskHelper.before_deployment_error(instance):
# We don't want to set cluster status to
# error because we don't want to lock
# settings if cluster wasn't deployed
q_nodes_to_error = \
TaskHelper.get_nodes_to_deployment_error(cluster)
cls.__update_cluster_status(
cluster, consts.CLUSTER_STATUSES.error, None
)
cls.__update_nodes_to_error(q_nodes_to_error,
error_type='deploy')
elif instance.name == consts.TASK_NAMES.provision:
if instance.status == consts.TASK_STATUSES.ready:
cls.__update_cluster_status(
cluster, consts.CLUSTER_STATUSES.partially_deployed, None
)
elif instance.status == consts.TASK_STATUSES.error:
cls.__update_cluster_status(
cluster, consts.CLUSTER_STATUSES.error, None
)
q_nodes_to_error = \
TaskHelper.get_nodes_to_provisioning_error(cluster)
elif instance.name == 'provision' and instance.status == 'error':
cls.__update_cluster_status(cluster, 'error')
q_nodes_to_error = \
TaskHelper.get_nodes_to_provisioning_error(cluster)
cls.__update_nodes_to_error(q_nodes_to_error,
error_type='provision')
elif instance.name == 'stop_deployment':
if instance.status == 'error':
cls.__update_cluster_status(cluster, 'error')
cls.__update_nodes_to_error(
q_nodes_to_error, error_type=consts.NODE_ERRORS.provision)
elif instance.name == consts.TASK_NAMES.stop_deployment:
if instance.status == consts.TASK_STATUSES.error:
cls.__update_cluster_status(
consts.CLUSTER_STATUSES.error, None
)
else:
cls.__update_cluster_status(cluster, 'stopped')
cls.__update_cluster_status(
cluster, consts.CLUSTER_STATUSES.stopped, None
)
@classmethod
def _clean_data(cls, data):

View File

@ -1542,7 +1542,7 @@ class TestConsumer(BaseReciverTestCase):
self.assertEqual(nodes[1].status, consts.NODE_STATUSES.ready)
self.assertEqual(task.status, consts.TASK_STATUSES.ready)
def _check_success_message(self, callback, task_name, node_status):
def _check_success_message(self, callback, task_name, c_status, n_status):
self.env.create(
cluster_kwargs={},
nodes_kwargs=[
@ -1566,31 +1566,38 @@ class TestConsumer(BaseReciverTestCase):
'task_uuid': task.uuid,
'status': consts.TASK_STATUSES.ready,
'progress': 100,
'nodes': [{'uid': nodes[0].uid, 'status':node_status}]
'nodes': [{'uid': nodes[0].uid, 'status': n_status}]
}
callback(**params)
self.assertEqual(
"{0} of 1 environment node(s) is done.".format(task_title),
task.message
)
self.db.refresh(cluster)
self.assertEqual(
consts.CLUSTER_STATUSES.partially_deployed, cluster.status
)
params['nodes'] = []
callback(**params)
self.assertEqual(
"{0} is done. No changes.".format(task_title),
task.message
)
params['nodes'] = [{'uid': nodes[1].uid, 'status':node_status}]
params['nodes'] = [{'uid': nodes[1].uid, 'status': n_status}]
callback(**params)
self.assertEqual(
"{0} of environment '{1}' is done."
.format(task_title, cluster.name),
task.message
)
self.db.refresh(cluster)
self.assertEqual(c_status, cluster.status)
def test_success_deploy_messsage(self):
self._check_success_message(
self.receiver.deploy_resp,
consts.TASK_NAMES.deployment,
consts.CLUSTER_STATUSES.operational,
consts.NODE_STATUSES.ready
)
@ -1598,8 +1605,10 @@ class TestConsumer(BaseReciverTestCase):
self._check_success_message(
self.receiver.deploy_resp,
consts.TASK_NAMES.provision,
consts.CLUSTER_STATUSES.partially_deployed,
consts.NODE_STATUSES.provisioned
)
self.assertFalse(self.env.clusters[-1].is_locked)
class TestResetEnvironment(BaseReciverTestCase):

View File

@ -440,6 +440,17 @@ class TestNodeAttributesMigration(base.BaseAlembicMigrationTest):
self.assertEqual(db_value, '{}')
class TestClusterStatusMigration(base.BaseAlembicMigrationTest):
def test_cluster_status_upgraded(self):
clusters_table = self.meta.tables['clusters']
columns = [clusters_table.c.id, clusters_table.c.status]
cluster = db.execute(sa.select(columns)).fetchone()
db.execute(clusters_table.update().where(
clusters_table.c.id == cluster.id
).values(status=consts.CLUSTER_STATUSES.partially_deployed))
class TestRemoveWizardMetadata(base.BaseAlembicMigrationTest):
def test_wizard_metadata_does_not_exist_in_releases(self):

View File

@ -747,12 +747,15 @@ class TestTaskObject(BaseIntegrationTest):
self._nodes_should_not_be_error(self.cluster.nodes[1:])
def test_update_cluster_to_operational(self):
task = Task(name=consts.TASK_NAMES.deploy,
task = Task(name=consts.TASK_NAMES.deployment,
cluster=self.cluster,
status=consts.TASK_STATUSES.ready)
self.db.add(task)
self.db.flush()
for node in self.env.nodes:
node.status = consts.NODE_STATUSES.ready
objects.Task._update_cluster_data(task)
self.db.flush()
@ -789,7 +792,7 @@ class TestTaskObject(BaseIntegrationTest):
self.cluster.nodes[0].status = consts.NODE_STATUSES.deploying
self.cluster.nodes[0].progress = 24
task = Task(name=consts.TASK_NAMES.deploy,
task = Task(name=consts.TASK_NAMES.deployment,
cluster=self.cluster,
status=consts.TASK_STATUSES.ready)
self.db.add(task)

View File

@ -194,34 +194,44 @@ class TestHelperUpdateClusterStatus(BaseTestCase):
self.nodes_should_not_be_error(self.cluster.nodes[1:])
def test_update_cluster_to_operational(self):
deploy_task = Task(name='deploy', cluster=self.cluster, status='ready')
deploy_task = Task(
name=consts.TASK_NAMES.deployment,
cluster=self.cluster, status=consts.TASK_STATUSES.ready
)
for node in self.env.nodes:
node.status = consts.NODE_STATUSES.ready
self.db.add(deploy_task)
self.db.commit()
objects.Task._update_cluster_data(deploy_task)
self.db.flush()
self.assertEqual(self.cluster.status, 'operational')
self.assertEqual(
self.cluster.status, consts.CLUSTER_STATUSES.operational)
def test_update_if_parent_task_is_ready_all_nodes_should_be_ready(self):
for node in self.cluster.nodes:
node.status = 'ready'
node.status = consts.NODE_STATUSES.ready
node.progress = 100
self.cluster.nodes[0].status = 'deploying'
self.cluster.nodes[0].status = consts.NODE_STATUSES.deploying
self.cluster.nodes[0].progress = 24
deploy_task = Task(name='deploy', cluster=self.cluster, status='ready')
deploy_task = Task(
name=consts.TASK_NAMES.deployment,
cluster=self.cluster, status=consts.TASK_STATUSES.ready
)
self.db.add(deploy_task)
self.db.commit()
objects.Task._update_cluster_data(deploy_task)
self.db.flush()
self.assertEqual(self.cluster.status, 'operational')
self.assertEqual(
self.cluster.status, consts.CLUSTER_STATUSES.operational)
for node in self.cluster.nodes:
self.assertEqual(node.status, 'ready')
self.assertEqual(node.status, consts.NODE_STATUSES.ready)
self.assertEqual(node.progress, 100)
def test_update_cluster_status_if_task_was_already_in_error_status(self):