fuel-astute/lib/astute/deployment_engine.rb

270 lines
9.3 KiB
Ruby

# Copyright 2013 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
module Astute
class DeploymentEngine
def initialize(context)
if self.class.superclass.name == 'Object'
raise "Instantiation of this superclass is not allowed. Please subclass from #{self.class.name}."
end
@ctx = context
end
def deploy(deployment_info, pre_deployment=[], post_deployment=[])
raise "Deployment info are not provided!" if deployment_info.blank?
deployment_info, pre_deployment, post_deployment = remove_failed_nodes(deployment_info,
pre_deployment,
post_deployment)
@ctx.deploy_log_parser.deploy_type = deployment_info.first['deployment_mode']
Astute.logger.info "Deployment mode #{@ctx.deploy_log_parser.deploy_type}"
begin
pre_deployment_actions(deployment_info, pre_deployment)
rescue => e
Astute.logger.error("Unexpected error #{e.message} traceback #{e.format_backtrace}")
raise e
end
failed = []
# Sort by priority (the lower the number, the higher the priority)
# and send groups to deploy
deployment_info.sort_by { |f| f['priority'] }.group_by{ |f| f['priority'] }.each do |_, nodes|
# Prevent attempts to run several deploy on a single node.
# This is possible because one node
# can perform multiple roles.
group_by_uniq_values(nodes).each do |nodes_group|
# Prevent deploy too many nodes at once
nodes_group.each_slice(Astute.config[:max_nodes_per_call]) do |part|
# for each chunk run group deployment pipeline
# create links to the astute.yaml
pre_deploy_actions(part)
# run group deployment
deploy_piece(part)
failed = critical_failed_nodes(part)
# if any of the node are critical and failed
# raise an error and mark all other nodes as error
if failed.any?
# TODO(dshulyak) maybe we should print all failed tasks for this nodes
# but i am not sure how it will look like
raise Astute::DeploymentEngineError, "Deployment failed on nodes #{failed.join(', ')}"
end
end
end
end
# Post deployment hooks
post_deployment_actions(deployment_info, post_deployment)
end
protected
def validate_nodes(nodes)
return true unless nodes.empty?
Astute.logger.info "#{@ctx.task_id}: Nodes to deploy are not provided. Do nothing."
false
end
private
# Transform nodes source array to array of nodes arrays where subarray
# contain only uniq elements from source
# Source: [
# {'uid' => 1, 'role' => 'cinder'},
# {'uid' => 2, 'role' => 'cinder'},
# {'uid' => 2, 'role' => 'compute'}]
# Result: [
# [{'uid' =>1, 'role' => 'cinder'},
# {'uid' => 2, 'role' => 'cinder'}],
# [{'uid' => 2, 'role' => 'compute'}]]
def group_by_uniq_values(nodes_array)
nodes_array = deep_copy(nodes_array)
sub_arrays = []
while !nodes_array.empty?
sub_arrays << uniq_nodes(nodes_array)
uniq_nodes(nodes_array).clone.each {|e| nodes_array.slice!(nodes_array.index(e)) }
end
sub_arrays
end
def uniq_nodes(nodes_array)
nodes_array.inject([]) { |result, node| result << node unless include_node?(result, node); result }
end
def include_node?(nodes_array, node)
nodes_array.find { |n| node['uid'] == n['uid'] }
end
def nodes_status(nodes, status, data_to_merge)
{
'nodes' => nodes.map do |n|
{'uid' => n['uid'], 'status' => status, 'role' => n['role']}.merge(data_to_merge)
end
}
end
def critical_failed_nodes(part)
part.select{ |n| n['fail_if_error'] }.map{ |n| n['uid'] } &
@ctx.status.select { |k, v| v == 'error' }.keys
end
def pre_deployment_actions(deployment_info, pre_deployment)
raise "Should be implemented"
end
def pre_node_actions(part)
raise "Should be implemented"
end
def pre_deploy_actions(part)
raise "Should be implemented"
end
def post_deploy_actions(part)
raise "Should be implemented"
end
def post_deployment_actions(deployment_info, post_deployment)
raise "Should be implemented"
end
# Removes nodes which failed to provision
def remove_failed_nodes(deployment_info, pre_deployment, post_deployment)
uids = get_uids_from_deployment_info deployment_info
required_nodes = deployment_info.select { |node| node["fail_if_error"] }
required_uids = required_nodes.map { |node| node["uid"]}
available_uids = detect_available_nodes(uids)
offline_uids = uids - available_uids
if offline_uids.present?
# set status for all failed nodes to error
nodes = (uids - available_uids).map do |uid|
{'uid' => uid,
'status' => 'error',
'error_type' => 'provision',
# Avoid deployment reporter param validation
'role' => 'hook',
'error_msg' => 'Node is not ready for deployment: mcollective has not answered'
}
end
@ctx.report_and_update_status('nodes' => nodes, 'error' => 'Node is not ready for deployment')
# check if all required nodes are online
# if not, raise error
missing_required = required_uids - available_uids
if missing_required.present?
error_message = "Critical nodes are not available for deployment: #{missing_required}"
raise Astute::DeploymentEngineError, error_message
end
end
return remove_offline_nodes(
uids,
available_uids,
pre_deployment,
deployment_info,
post_deployment,
offline_uids)
end
def remove_offline_nodes(uids, available_uids, pre_deployment, deployment_info, post_deployment, offline_uids)
if offline_uids.blank?
return [deployment_info, pre_deployment, post_deployment]
end
Astute.logger.info "Removing nodes which failed to provision: #{offline_uids}"
deployment_info = cleanup_nodes_block(deployment_info, offline_uids)
deployment_info = deployment_info.select { |node| available_uids.include? node['uid'] }
available_uids += ["master"]
pre_deployment.each do |task|
task['uids'] = task['uids'].select { |uid| available_uids.include? uid }
end
post_deployment.each do |task|
task['uids'] = task['uids'].select { |uid| available_uids.include? uid }
end
[pre_deployment, post_deployment].each do |deployment_task|
deployment_task.select! do |task|
if task['uids'].present?
true
else
Astute.logger.info "Task(hook) was deleted because there is no " \
"node where it should be run \n#{task.to_yaml}"
false
end
end
end
[deployment_info, pre_deployment, post_deployment]
end
def cleanup_nodes_block(deployment_info, offline_uids)
return deployment_info if offline_uids.blank?
nodes = deployment_info.first['nodes']
# In case of deploy in already existing cluster in nodes block
# we will have all cluster nodes. We should remove only missing
# nodes instead of stay only available.
# Example: deploy 3 nodes, after it deploy 2 nodes.
# In 1 of 2 seconds nodes missing, in nodes block we should
# contain only 4 nodes.
nodes_wthout_missing = nodes.select { |node| !offline_uids.include?(node['uid']) }
deployment_info.each { |node| node['nodes'] = nodes_wthout_missing }
deployment_info
end
def detect_available_nodes(uids)
all_uids = uids.clone
available_uids = []
# In case of big amount of nodes we should do several calls to be sure
# about node status
Astute.config[:mc_retries].times.each do
systemtype = Astute::MClient.new(@ctx, "systemtype", all_uids, check_result=false, 10)
available_nodes = systemtype.get_type.select do |node|
node.results[:data][:node_type].chomp == "target"
end
available_uids += available_nodes.map { |node| node.results[:sender] }
all_uids -= available_uids
break if all_uids.empty?
sleep Astute.config[:mc_retry_interval]
end
available_uids
end
def get_uids_from_deployment_info(deployment_info)
top_level_uids = deployment_info.map{ |node| node["uid"] }
inside_uids = deployment_info.inject([]) do |uids, node|
uids += node.fetch('nodes', []).map{ |n| n['uid'] }
end
top_level_uids | inside_uids
end
end
end