Stop deployment if one of the critical node fail (fail fast)
Details: - add new node attribute: fail_if_error (boolean); - add list of critical nodes for multinodes and HA mode. - do not mark non-reported nodes as error nodes in deployment case. Change-Id: I657ea6a02b20505e47527618e14c64fc9b48a6b4 Related-Bug: #1251634
This commit is contained in:
parent
51f32395ee
commit
3d38d0a11d
|
@ -149,8 +149,9 @@ class Node(Base):
|
|||
|
||||
@property
|
||||
def needs_redeploy(self):
|
||||
return (self.status == 'error' or len(self.pending_roles)) and \
|
||||
not self.pending_deletion
|
||||
return (
|
||||
self.status in ['error', 'provisioned'] or
|
||||
len(self.pending_roles)) and not self.pending_deletion
|
||||
|
||||
@property
|
||||
def needs_redeletion(self):
|
||||
|
|
|
@ -69,6 +69,8 @@ def get_nodes_not_for_deletion(cluster):
|
|||
|
||||
class DeploymentMultinodeSerializer(object):
|
||||
|
||||
critical_roles = ['controller', 'ceph-osd', 'primary-mongo']
|
||||
|
||||
@classmethod
|
||||
def serialize(cls, cluster, nodes):
|
||||
"""Method generates facts which
|
||||
|
@ -78,6 +80,7 @@ class DeploymentMultinodeSerializer(object):
|
|||
common_attrs = cls.get_common_attrs(cluster)
|
||||
|
||||
cls.set_deployment_priorities(nodes)
|
||||
cls.set_critical_nodes(cluster, nodes)
|
||||
|
||||
return [dict_merge(node, common_attrs) for node in nodes]
|
||||
|
||||
|
@ -87,9 +90,7 @@ class DeploymentMultinodeSerializer(object):
|
|||
attrs = objects.Attributes.merged_attrs_values(
|
||||
cluster.attributes
|
||||
)
|
||||
release = objects.Release.get_by_uid(cluster.pending_release_id) \
|
||||
if cluster.status == consts.CLUSTER_STATUSES.update \
|
||||
else cluster.release
|
||||
release = cls.current_release(cluster)
|
||||
attrs['deployment_mode'] = cluster.mode
|
||||
attrs['deployment_id'] = cluster.id
|
||||
attrs['openstack_version'] = release.version
|
||||
|
@ -113,6 +114,13 @@ class DeploymentMultinodeSerializer(object):
|
|||
|
||||
return attrs
|
||||
|
||||
@classmethod
|
||||
def current_release(cls, cluster):
|
||||
"""Actual cluster release."""
|
||||
return objects.Release.get_by_uid(cluster.pending_release_id) \
|
||||
if cluster.status == consts.CLUSTER_STATUSES.update \
|
||||
else cluster.release
|
||||
|
||||
@classmethod
|
||||
def set_storage_parameters(cls, cluster, attrs):
|
||||
"""Generate pg_num as the number of OSDs across the cluster
|
||||
|
@ -187,6 +195,14 @@ class DeploymentMultinodeSerializer(object):
|
|||
'zabbix-server']):
|
||||
n['priority'] = other_nodes_prior
|
||||
|
||||
@classmethod
|
||||
def set_critical_nodes(cls, cluster, nodes):
|
||||
"""Set behavior on nodes deployment error
|
||||
during deployment process.
|
||||
"""
|
||||
for n in nodes:
|
||||
n['fail_if_error'] = n['role'] in cls.critical_roles
|
||||
|
||||
@classmethod
|
||||
def serialize_nodes(cls, nodes):
|
||||
"""Serialize node for each role.
|
||||
|
@ -316,6 +332,11 @@ class DeploymentMultinodeSerializer(object):
|
|||
class DeploymentHASerializer(DeploymentMultinodeSerializer):
|
||||
"""Serializer for ha mode."""
|
||||
|
||||
critical_roles = ['primary-controller',
|
||||
'primary-mongo',
|
||||
'primary-swift-proxy',
|
||||
'ceph-osd']
|
||||
|
||||
@classmethod
|
||||
def serialize_nodes(cls, nodes):
|
||||
"""Serialize nodes and set primary-controller
|
||||
|
|
|
@ -140,7 +140,7 @@ class TaskHelper(object):
|
|||
def get_nodes_to_deployment_error(cls, cluster):
|
||||
q_nodes_to_error = db().query(Node).\
|
||||
filter(Node.cluster == cluster).\
|
||||
filter(Node.status.in_(['provisioned', 'deploying']))
|
||||
filter(Node.status.in_(['deploying']))
|
||||
|
||||
return q_nodes_to_error
|
||||
|
||||
|
|
|
@ -643,7 +643,7 @@ class CheckBeforeDeploymentTask(object):
|
|||
def _is_disk_checking_required(cls, node):
|
||||
"""Disk checking required in case if node is not provisioned.
|
||||
"""
|
||||
if node.status in ('ready', 'deploying') or \
|
||||
if node.status in ('ready', 'deploying', 'provisioned') or \
|
||||
(node.status == 'error' and node.error_type != 'provision'):
|
||||
return False
|
||||
|
||||
|
|
|
@ -171,11 +171,19 @@ class TestHandlers(BaseIntegrationTest):
|
|||
'compute': 700
|
||||
}
|
||||
|
||||
critical_mapping = {
|
||||
'primary-controller': True,
|
||||
'controller': False,
|
||||
'cinder': False,
|
||||
'compute': False
|
||||
}
|
||||
|
||||
deployment_info = []
|
||||
for node in nodes_db:
|
||||
ips = assigned_ips[node.id]
|
||||
for role in sorted(node.roles):
|
||||
priority = priority_mapping[role]
|
||||
is_critical = critical_mapping[role]
|
||||
if isinstance(priority, list):
|
||||
priority = priority.pop()
|
||||
|
||||
|
@ -184,6 +192,7 @@ class TestHandlers(BaseIntegrationTest):
|
|||
'status': node.status,
|
||||
'role': role,
|
||||
'online': node.online,
|
||||
'fail_if_error': is_critical,
|
||||
'fqdn': 'node-%d.%s' % (node.id, settings.DNS_DOMAIN),
|
||||
'priority': priority,
|
||||
|
||||
|
@ -219,6 +228,7 @@ class TestHandlers(BaseIntegrationTest):
|
|||
lambda node: node['role'] == 'controller',
|
||||
deployment_info)
|
||||
controller_nodes[0]['role'] = 'primary-controller'
|
||||
controller_nodes[0]['fail_if_error'] = True
|
||||
|
||||
supertask = self.env.launch_deployment()
|
||||
deploy_task_uuid = [x.uuid for x in supertask.subtasks
|
||||
|
@ -515,11 +525,20 @@ class TestHandlers(BaseIntegrationTest):
|
|||
'cinder': 700,
|
||||
'compute': 700
|
||||
}
|
||||
|
||||
critical_mapping = {
|
||||
'primary-controller': True,
|
||||
'controller': False,
|
||||
'cinder': False,
|
||||
'compute': False
|
||||
}
|
||||
|
||||
deployment_info = []
|
||||
for node in nodes_db:
|
||||
ips = assigned_ips[node.id]
|
||||
for role in sorted(node.roles):
|
||||
priority = priority_mapping[role]
|
||||
is_critical = critical_mapping[role]
|
||||
if isinstance(priority, list):
|
||||
priority = priority.pop()
|
||||
|
||||
|
@ -528,6 +547,7 @@ class TestHandlers(BaseIntegrationTest):
|
|||
'status': node.status,
|
||||
'role': role,
|
||||
'online': node.online,
|
||||
'fail_if_error': is_critical,
|
||||
'fqdn': 'node-%d.%s' % (node.id, settings.DNS_DOMAIN),
|
||||
'priority': priority,
|
||||
|
||||
|
@ -621,6 +641,7 @@ class TestHandlers(BaseIntegrationTest):
|
|||
lambda node: node['role'] == 'controller',
|
||||
deployment_info)
|
||||
controller_nodes[0]['role'] = 'primary-controller'
|
||||
controller_nodes[0]['fail_if_error'] = True
|
||||
|
||||
supertask = self.env.launch_deployment()
|
||||
deploy_task_uuid = [x.uuid for x in supertask.subtasks
|
||||
|
|
|
@ -281,6 +281,27 @@ class TestNovaOrchestratorSerializer(OrchestratorSerializerTestBase):
|
|||
]
|
||||
self.assertEqual(expected_priorities, nodes)
|
||||
|
||||
def test_set_critital_node(self):
|
||||
nodes = [
|
||||
{'role': 'mongo'},
|
||||
{'role': 'mongo'},
|
||||
{'role': 'primary-mongo'},
|
||||
{'role': 'controller'},
|
||||
{'role': 'ceph-osd'},
|
||||
{'role': 'other'}
|
||||
]
|
||||
serializer = DeploymentMultinodeSerializer()
|
||||
serializer.set_critical_nodes(self.cluster, nodes)
|
||||
expected_ciritial_roles = [
|
||||
{'role': 'mongo', 'fail_if_error': False},
|
||||
{'role': 'mongo', 'fail_if_error': False},
|
||||
{'role': 'primary-mongo', 'fail_if_error': True},
|
||||
{'role': 'controller', 'fail_if_error': True},
|
||||
{'role': 'ceph-osd', 'fail_if_error': True},
|
||||
{'role': 'other', 'fail_if_error': False}
|
||||
]
|
||||
self.assertEqual(expected_ciritial_roles, nodes)
|
||||
|
||||
|
||||
class TestNovaOrchestratorHASerializer(OrchestratorSerializerTestBase):
|
||||
|
||||
|
@ -382,6 +403,36 @@ class TestNovaOrchestratorHASerializer(OrchestratorSerializerTestBase):
|
|||
]
|
||||
self.assertEqual(expected_priorities, nodes)
|
||||
|
||||
def test_set_critital_node(self):
|
||||
nodes = [
|
||||
{'role': 'zabbix-server'},
|
||||
{'role': 'primary-swift-proxy'},
|
||||
{'role': 'swift-proxy'},
|
||||
{'role': 'storage'},
|
||||
{'role': 'mongo'},
|
||||
{'role': 'primary-mongo'},
|
||||
{'role': 'primary-controller'},
|
||||
{'role': 'controller'},
|
||||
{'role': 'controller'},
|
||||
{'role': 'ceph-osd'},
|
||||
{'role': 'other'}
|
||||
]
|
||||
self.serializer.set_critical_nodes(self.cluster, nodes)
|
||||
expected_ciritial_roles = [
|
||||
{'role': 'zabbix-server', 'fail_if_error': False},
|
||||
{'role': 'primary-swift-proxy', 'fail_if_error': True},
|
||||
{'role': 'swift-proxy', 'fail_if_error': False},
|
||||
{'role': 'storage', 'fail_if_error': False},
|
||||
{'role': 'mongo', 'fail_if_error': False},
|
||||
{'role': 'primary-mongo', 'fail_if_error': True},
|
||||
{'role': 'primary-controller', 'fail_if_error': True},
|
||||
{'role': 'controller', 'fail_if_error': False},
|
||||
{'role': 'controller', 'fail_if_error': False},
|
||||
{'role': 'ceph-osd', 'fail_if_error': True},
|
||||
{'role': 'other', 'fail_if_error': False}
|
||||
]
|
||||
self.assertEqual(expected_ciritial_roles, nodes)
|
||||
|
||||
def test_set_primary_controller_priority_not_depend_on_nodes_order(self):
|
||||
controllers = filter(lambda n: 'controller' in n.roles, self.env.nodes)
|
||||
expected_primary_controller = sorted(
|
||||
|
|
|
@ -194,6 +194,9 @@ class TestCheckBeforeDeploymentTask(BaseTestCase):
|
|||
self.set_node_status('discover')
|
||||
self.assertTrue(self.is_checking_required())
|
||||
|
||||
self.set_node_status('provisioned')
|
||||
self.assertFalse(self.is_checking_required())
|
||||
|
||||
def test_is_disk_checking_required_in_case_of_error(self):
|
||||
self.set_node_status('error')
|
||||
self.set_node_error_type('provision')
|
||||
|
|
|
@ -98,6 +98,27 @@ class TestTaskHelpers(BaseTestCase):
|
|||
computes = self.filter_by_role(nodes, 'compute')
|
||||
self.assertEqual(len(computes), 1)
|
||||
|
||||
def test_redeploy_with_critial_roles(self):
|
||||
cluster = self.create_env([
|
||||
{'roles': ['controller'], 'status': 'error'},
|
||||
{'roles': ['controller'], 'status': 'provisioned'},
|
||||
{'roles': ['controller'], 'status': 'provisioned'},
|
||||
{'roles': ['compute', 'cinder'], 'status': 'provisioned'},
|
||||
{'roles': ['compute'], 'status': 'provisioned'},
|
||||
{'roles': ['cinder'], 'status': 'provisioned'}])
|
||||
|
||||
nodes = TaskHelper.nodes_to_deploy(cluster)
|
||||
self.assertEqual(len(nodes), 6)
|
||||
|
||||
controllers = self.filter_by_role(nodes, 'controller')
|
||||
self.assertEqual(len(controllers), 3)
|
||||
|
||||
cinders = self.filter_by_role(nodes, 'cinder')
|
||||
self.assertEqual(len(cinders), 2)
|
||||
|
||||
computes = self.filter_by_role(nodes, 'compute')
|
||||
self.assertEqual(len(computes), 2)
|
||||
|
||||
# TODO(aroma): move it to utils testing code
|
||||
def test_recalculate_deployment_task_progress(self):
|
||||
cluster = self.create_env([
|
||||
|
|
Loading…
Reference in New Issue