Fault tolerance for provision

Generate list of nodes which can fail during provision.
User can set how many percent of this nodes may fail before provision fails.
By default it is 2%

DocImpact
blueprint: 200-nodes-support

Change-Id: Ic0540e246f8f2e0361533d52c6bb0f3689d4dd00
This commit is contained in:
Łukasz Oleś 2015-03-07 13:44:33 +01:00
parent a9a2a0f948
commit 2fe290cd2a
4 changed files with 75 additions and 5 deletions

View File

@ -41,6 +41,7 @@
description: "A compute node creates, manages and terminates virtual machine instances."
limits:
recommended: 1
fault_tolerance: "2%"
cinder:
# NOTE: naming, see https://bugs.launchpad.net/fuel/+bug/1383224
name: "Storage - Cinder"

View File

@ -49,12 +49,14 @@ class ProvisioningSerializer(object):
serialized_nodes.extend(
cls.serialize_nodes(cluster_attrs, node_group))
serialized_info = (cluster.replaced_provisioning_info or
cls.serialize_cluster_info(cluster_attrs))
cls.serialize_cluster_info(cluster_attrs, nodes))
serialized_info['fault_tolerance'] = cls.fault_tolerance(cluster,
nodes)
serialized_info['nodes'] = serialized_nodes
return serialized_info
@classmethod
def serialize_cluster_info(cls, cluster_attrs):
def serialize_cluster_info(cls, cluster_attrs, nodes):
return {
'engine': {
'url': settings.COBBLER_URL,
@ -228,6 +230,25 @@ class ProvisioningSerializer(object):
logger.info(u'Node %s seems booted with real system', node.full_name)
return settings.PATH_TO_SSH_KEY
@classmethod
def fault_tolerance(cls, cluster, nodes):
may_fail = []
roles_metadata = cluster.release.roles_metadata
for role in roles_metadata:
if 'fault_tolerance' in roles_metadata[role]:
tolerance = roles_metadata[role]['fault_tolerance']
# only percantage is supported for now
if not tolerance.endswith('%'):
continue
percentage = tolerance[:-1]
uids = []
for node in nodes:
if role in node.roles:
uids.append(node.uid)
may_fail.append({'uids': uids,
'percentage': int(percentage)})
return may_fail
class ProvisioningSerializer61(ProvisioningSerializer):

View File

@ -346,6 +346,7 @@ class TestHandlers(BaseIntegrationTest):
'password': settings.COBBLER_PASSWORD,
'master_ip': settings.MASTER_IP,
},
'fault_tolerance': [],
'nodes': provision_nodes}}}
args, kwargs = nailgun.task.manager.rpc.cast.call_args
@ -360,7 +361,9 @@ class TestHandlers(BaseIntegrationTest):
'storage_address',
'ipaddr',
'IP',
'tasks'])
'tasks',
'uids',
'percentage'])
self.datadiff(
args[1][1],
deployment_msg,
@ -780,6 +783,7 @@ class TestHandlers(BaseIntegrationTest):
'password': settings.COBBLER_PASSWORD,
'master_ip': settings.MASTER_IP,
},
'fault_tolerance': [],
'nodes': provision_nodes}}}
args, kwargs = nailgun.task.manager.rpc.cast.call_args
@ -794,7 +798,9 @@ class TestHandlers(BaseIntegrationTest):
'storage_address',
'ipaddr',
'IP',
'tasks'])
'tasks',
'uids',
'percentage'])
self.datadiff(
args[1][1],
deployment_msg,
@ -1231,6 +1237,7 @@ class TestHandlers(BaseIntegrationTest):
'password': settings.COBBLER_PASSWORD,
'master_ip': settings.MASTER_IP,
},
'fault_tolerance': [],
'nodes': provision_nodes}}}
args, kwargs = nailgun.task.manager.rpc.cast.call_args
@ -1245,7 +1252,9 @@ class TestHandlers(BaseIntegrationTest):
'storage_address',
'ipaddr',
'IP',
'tasks'])
'tasks',
'uids',
'percentage'])
self.datadiff(
args[1][1],
deployment_msg,

View File

@ -0,0 +1,39 @@
# Copyright 2015 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from nailgun.db.sqlalchemy.models import Cluster
from nailgun.orchestrator.provisioning_serializers import \
ProvisioningSerializer
from nailgun.test import base
class TestFaultTolerance(base.BaseTestCase):
def test_generating_fault_tolerance_data(self):
cluster = self.env.create(
nodes_kwargs=[
{'roles': ['controller']},
{'roles': ['controller']},
{'roles': ['controller', 'cinder']},
{'roles': ['compute', 'cinder']},
{'roles': ['compute']},
{'roles': ['cinder']}])
cluster_db = self.db.query(Cluster).get(cluster['id'])
uids = [node.uid for node in cluster_db.nodes
if 'compute' in node.roles]
correct_res = [{'uids': uids, 'percentage': 2}]
res = ProvisioningSerializer.fault_tolerance(cluster_db,
cluster_db.nodes)
self.assertEqual(res, correct_res)