Implement check node counts as a custom action

Adds a custom action to check the statistics of the hypervisors and
check the node counts of the stack.

Change-Id: I4a0576801ffbf8d29135966d7181f277ad2f8db0
Partial-Bug: #1638697
This commit is contained in:
Brad P. Crochet 2017-02-06 11:00:37 -05:00
parent 65a1e19a6d
commit d4517cd9cd
5 changed files with 379 additions and 0 deletions

View File

@ -0,0 +1,4 @@
---
features:
- Adds an action and workflow used to check the node
counts and the hypervisor.

View File

@ -98,6 +98,7 @@ mistral.actions =
tripleo.validations.check_boot_images = tripleo_common.actions.validations:CheckBootImagesAction
tripleo.validations.check_flavors = tripleo_common.actions.validations:CheckFlavorsAction
tripleo.validations.check_node_boot_configuration = tripleo_common.actions.validations:CheckNodeBootConfigurationAction
tripleo.validations.check_nodes_count = tripleo_common.actions.validations:CheckNodesCountAction
tripleo.validations.get_pubkey = tripleo_common.actions.validations:GetPubkeyAction
tripleo.validations.enabled = tripleo_common.actions.validations:Enabled
tripleo.validations.list_groups = tripleo_common.actions.validations:ListGroupsAction

View File

@ -384,3 +384,76 @@ class VerifyProfilesAction(base.TripleOAction):
"""Get node capabilities."""
return nodeutils.capabilities_to_dict(
node['properties'].get('capabilities'))
class CheckNodesCountAction(base.TripleOAction):
"""Validate hypervisor statistics"""
# TODO(bcrochet): The validation actions are temporary. This logic should
# move to the tripleo-validations project eventually.
def __init__(self, statistics, stack, associated_nodes, available_nodes,
parameters, default_role_counts):
super(CheckNodesCountAction, self).__init__()
self.statistics = statistics
self.stack = stack
self.associated_nodes = associated_nodes
self.available_nodes = available_nodes
self.parameters = parameters
self.default_role_counts = default_role_counts
def run(self):
errors = []
warnings = []
requested_count = 0
for param, default in self.default_role_counts.items():
if self.stack:
try:
current = int(self.stack['parameters'][param])
except KeyError:
# We could be adding a new role on stack-update, so there's
# no assumption the parameter exists in the stack.
current = self.parameters.get(param, default)
requested_count += self.parameters.get(param, current)
else:
requested_count += self.parameters.get(param, default)
# We get number of nodes usable for the stack by getting already
# used (associated) nodes and number of nodes which can be used
# (not in maintenance mode).
# Assumption is that associated nodes are part of the stack (only
# one overcloud is supported).
associated = len(self.associated_nodes)
available = len(self.available_nodes)
available_count = associated + available
if requested_count > available_count:
errors.append('Not enough baremetal nodes - available: %d, '
'requested: %d' %
(available_count, requested_count))
if self.statistics['count'] < available_count:
errors.append('Only %d nodes are exposed to Nova of %d requests. '
'Check that enough nodes are in "available" state '
'with maintenance mode off.' %
(self.statistics['count'], available_count))
return_value = {
'errors': errors,
'warnings': warnings,
'result': {
'statistics': self.statistics,
'enough_nodes': True,
'requested_count': requested_count,
'available_count': available_count,
}
}
if errors:
return_value['result']['enough_nodes'] = False
mistral_result = {'error': return_value}
else:
mistral_result = {'data': return_value}
return mistral_workflow_utils.Result(**mistral_result)

View File

@ -656,3 +656,183 @@ class TestVerifyProfilesAction(base.TestCase):
]
})
self._test(expected)
class TestCheckNodesCountAction(base.TestCase):
def setUp(self):
super(TestCheckNodesCountAction, self).setUp()
self.defaults = {
'ControllerCount': 1,
'ComputeCount': 1,
'ObjectStorageCount': 0,
'BlockStorageCount': 0,
'CephStorageCount': 0,
}
self.stack = None
self.action_args = {
'stack': None,
'associated_nodes': self._ironic_node_list(True, False),
'available_nodes': self._ironic_node_list(False, True),
'parameters': {},
'default_role_counts': self.defaults,
'statistics': {'count': 3, 'memory_mb': 1, 'vcpus': 1},
}
def _ironic_node_list(self, associated, maintenance):
if associated:
nodes = range(2)
elif maintenance:
nodes = range(1)
return nodes
def test_run_check_hypervisor_stats(self):
action_args = self.action_args.copy()
action = validations.CheckNodesCountAction(**action_args)
result = action.run()
expected = mistral_workflow_utils.Result(
data={
'result': {
'requested_count': 2,
'available_count': 3,
'statistics': {'count': 3, 'vcpus': 1, 'memory_mb': 1},
'enough_nodes': True
},
'errors': [],
'warnings': [],
})
self.assertEqual(expected, result)
def test_run_check_hypervisor_stats_not_met(self):
statistics = {'count': 0, 'memory_mb': 0, 'vcpus': 0}
action_args = self.action_args.copy()
action_args.update({'statistics': statistics})
action = validations.CheckNodesCountAction(**action_args)
result = action.run()
expected = mistral_workflow_utils.Result(
error={
'errors': [
'Only 0 nodes are exposed to Nova of 3 requests. Check '
'that enough nodes are in "available" state with '
'maintenance mode off.'],
'warnings': [],
'result': {
'statistics': statistics,
'enough_nodes': False,
'requested_count': 2,
'available_count': 3,
}
})
self.assertEqual(expected, result)
def test_check_nodes_count_deploy_enough_nodes(self):
action_args = self.action_args.copy()
action_args['parameters'] = {'ControllerCount': 2}
action = validations.CheckNodesCountAction(**action_args)
result = action.run()
expected = mistral_workflow_utils.Result(
data={
'errors': [],
'warnings': [],
'result': {
'enough_nodes': True,
'requested_count': 3,
'available_count': 3,
'statistics': {'count': 3, 'memory_mb': 1, 'vcpus': 1}
}
})
self.assertEqual(expected, result)
def test_check_nodes_count_deploy_too_much(self):
action_args = self.action_args.copy()
action_args['parameters'] = {'ControllerCount': 3}
action = validations.CheckNodesCountAction(**action_args)
result = action.run()
expected = mistral_workflow_utils.Result(
error={
'errors': [
"Not enough baremetal nodes - available: 3, requested: 4"],
'warnings': [],
'result': {
'enough_nodes': False,
'requested_count': 4,
'available_count': 3,
'statistics': {'count': 3, 'memory_mb': 1, 'vcpus': 1}
}
})
self.assertEqual(expected, result)
def test_check_nodes_count_scale_enough_nodes(self):
action_args = self.action_args.copy()
action_args['parameters'] = {'ControllerCount': 2}
action_args['stack'] = {'parameters': self.defaults.copy()}
action = validations.CheckNodesCountAction(**action_args)
result = action.run()
expected = mistral_workflow_utils.Result(
data={
'errors': [],
'warnings': [],
'result': {
'enough_nodes': True,
'requested_count': 3,
'available_count': 3,
'statistics': {'count': 3, 'memory_mb': 1, 'vcpus': 1}
},
})
self.assertEqual(expected, result)
def test_check_nodes_count_scale_too_much(self):
action_args = self.action_args.copy()
action_args['parameters'] = {'ControllerCount': 3}
action_args['stack'] = {'parameters': self.defaults.copy()}
action = validations.CheckNodesCountAction(**action_args)
result = action.run()
expected = mistral_workflow_utils.Result(
error={
'errors': [
'Not enough baremetal nodes - available: 3, requested: 4'],
'warnings': [],
'result': {
'enough_nodes': False,
'requested_count': 4,
'available_count': 3,
'statistics': {'count': 3, 'memory_mb': 1, 'vcpus': 1}
}
})
self.assertEqual(expected, result)
def test_check_default_param_not_in_stack(self):
missing_param = 'CephStorageCount'
action_args = self.action_args.copy()
action_args['parameters'] = {'ControllerCount': 3}
action_args['stack'] = {'parameters': self.defaults.copy()}
del action_args['stack']['parameters'][missing_param]
action = validations.CheckNodesCountAction(**action_args)
result = action.run()
expected = mistral_workflow_utils.Result(
error={
'errors': [
'Not enough baremetal nodes - available: 3, requested: 4'],
'warnings': [],
'result': {
'enough_nodes': False,
'requested_count': 4,
'available_count': 3,
'statistics': {'count': 3, 'memory_mb': 1, 'vcpus': 1}
}
})
self.assertEqual(expected, result)

View File

@ -517,3 +517,124 @@ workflows:
warnings: <% $.warnings %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
check_default_nodes_count:
input:
- stack_id: overcloud
- parameters: {}
- default_role_counts: {}
- run_validations: true
- queue_name: tripleo
output:
statistics: <% $.statistics %>
errors: <% $.errors %>
warnings: <% $.warnings %>
tasks:
check_run_validations:
on-complete:
- get_hypervisor_statistics: <% $.run_validations %>
- send_message: <% not $.run_validations %>
get_hypervisor_statistics:
action: nova.hypervisors_statistics
on-success: get_stack
on-error: fail_get_hypervisor_statistics
publish:
statistics: <% task(get_hypervisor_statistics).result %>
fail_get_hypervisor_statistics:
on-success: send_message
publish:
status: FAILED
message: <% task(get_hypervisor_statistics).result %>
errors: []
warnings: []
statistics: null
get_stack:
action: heat.stacks_get
input:
stack_id: <% $.stack_id %>
on-complete: get_associated_nodes
publish:
stack: <% task(get_stack).result %>
publish-on-error:
stack: null
get_associated_nodes:
action: ironic.node_list
on-success: get_available_nodes
on-error: fail_get_associated_nodes
input:
associated: true
publish:
associated_nodes: <% task(get_associated_nodes).result %>
fail_get_associated_nodes:
on-success: send_message
publish:
status: FAILED
message: <% task(get_associated_nodes).result %>
errors: []
warnings: []
get_available_nodes:
action: ironic.node_list
on-success: check_nodes_count
on-error: fail_get_available_nodes
input:
associated: false
maintenance: false
publish:
available_nodes: <% task(get_available_nodes).result %>
fail_get_available_nodes:
on-success: send_message
publish:
status: FAILED
message: <% task(get_available_nodes).result %>
errors: []
warnings: []
check_nodes_count:
action: tripleo.validations.check_nodes_count
input:
statistics: <% $.statistics %>
stack: <% $.stack %>
associated_nodes: <% $.associated_nodes %>
available_nodes: <% $.available_nodes %>
parameters: <% $.parameters %>
default_role_counts: <% $.default_role_counts %>
on-success: send_message
on-error: fail_check_nodes_count
publish:
errors: <% task(check_nodes_count).result.errors %>
warnings: <% task(check_nodes_count).result.warnings %>
fail_check_nodes_count:
on-success: send_message
publish:
status: FAILED
message: <% task(check_nodes_count).result %>
statistics: null
errors: <% task(check_nodes_count).result.errors %>
warnings: <% task(check_nodes_count).result.warnings %>
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.validations.v1.check_hypervisor_stats
payload:
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
statistics: <% $.statistics %>
errors: <% $.errors %>
warnings: <% $.warnings %>
on-success:
- fail: <% $.get('status') = "FAILED" %>