Move provisioning part to separate file

Blueprint: 200-nodes-support
Change-Id: I2cce937ce5c5a4cd3d91c87b8d7eaaa825c6c224
This commit is contained in:
Łukasz Oleś 2015-01-26 13:24:43 +01:00
parent 7f5aaa112a
commit 3a39ae300b
5 changed files with 1039 additions and 911 deletions

View File

@ -30,6 +30,7 @@ require 'astute/orchestrator'
require 'astute/deployment_engine'
require 'astute/network'
require 'astute/puppetd'
require 'astute/provision'
require 'astute/deployment_engine/nailyfact'
require 'astute/deployment_engine/tasklib'
require 'astute/deployment_engine/granular_deployment'

View File

@ -20,15 +20,8 @@ module Astute
end
def node_type(reporter, task_id, nodes_uids, timeout=nil)
context = Context.new(task_id, reporter)
systemtype = MClient.new(context, "systemtype", nodes_uids, check_result=false, timeout)
systems = systemtype.get_type
systems.map do |n|
{
'uid' => n.results[:sender],
'node_type' => n.results[:data][:node_type].chomp
}
end
provisioner = Provisioner.new(@log_parsing)
provisioner.node_type(reporter, task_id, nodes_uids, timeout)
end
def execute_tasks(up_reporter, task_id, tasks)
@ -71,178 +64,13 @@ module Astute
end
def provision(reporter, task_id, engine_attrs, nodes)
raise "Nodes to provision are not provided!" if nodes.empty?
provision_method = engine_attrs['provision_method'] || 'cobbler'
cobbler = CobblerManager.new(engine_attrs, reporter)
begin
remove_nodes(
reporter,
task_id,
engine_attrs,
nodes,
reboot=false,
raise_if_error=true
)
cobbler.add_nodes(nodes)
# if provision_method is 'image', we do not need to immediately
# reboot nodes. instead, we need to run image based provisioning
# process and then reboot nodes
# TODO(kozhukalov): do not forget about execute_shell_command timeout which is 3600
# watch_provision_progress has provisioning_timeout + 3600 is much longer than provisioning_timeout
if provision_method == 'image'
# disabling pxe boot
cobbler.netboot_nodes(nodes, false)
image_provision(reporter, task_id, nodes)
end
# TODO(vsharshov): maybe we should reboot nodes using mco or ssh instead of Cobbler
reboot_events = cobbler.reboot_nodes(nodes)
failed_nodes = cobbler.check_reboot_nodes(reboot_events)
# control reboot for nodes which still in bootstrap state
# Note: if the image based provisioning is used nodes are already
# provisioned and rebooting is not necessary. In fact the forced
# reboot can corrupt a node if it manages to reboot fast enough
# (see LP #1394599)
# XXX: actually there's a tiny probability to reboot a node being
# provisioned in a traditional way (by Debian installer or anaconda),
# however such a double reboot is not dangerous since cobbler will
# boot such a node into installer once again.
if provision_method != 'image'
control_reboot_using_ssh(reporter, task_id, nodes)
end
rescue => e
Astute.logger.error("Error occured while provisioning: #{e.inspect}")
reporter.report({
'status' => 'error',
'error' => e.message,
'progress' => 100})
unlock_nodes_discovery(reporter, task_id, nodes.map {|n| n['slave_name']}, nodes)
raise e
end
if failed_nodes.present?
err_msg = "Nodes failed to reboot: #{failed_nodes.inspect}"
Astute.logger.error(err_msg)
reporter.report({
'status' => 'error',
'error' => err_msg,
'progress' => 100})
unlock_nodes_discovery(reporter, task_id="", failed_nodes, nodes)
raise FailedToRebootNodesError.new(err_msg)
end
watch_provision_progress(reporter, task_id, nodes)
end
def image_provision(reporter, task_id, nodes)
failed_uids_provis = ImageProvision.provision(Context.new(task_id, reporter), nodes)
if failed_uids_provis.empty?
reporter.report({
'status' => 'provisioning',
'progress' => 80,
'msg' => 'Nodes have beed successfully provisioned. Next step is reboot.'
})
else
err_msg = 'At least one of nodes have failed during provisioning'
Astute.logger.error("#{task_id}: #{err_msg}")
reporter.report({
'status' => 'error',
'progress' => 100,
'msg' => err_msg,
'error_type' => 'provision'
})
raise FailedImageProvisionError.new(err_msg)
end
end
def watch_provision_progress(reporter, task_id, nodes)
raise "Nodes to provision are not provided!" if nodes.empty?
provision_log_parser = @log_parsing ? LogParser::ParseProvisionLogs.new : LogParser::NoParsing.new
proxy_reporter = ProxyReporter::DeploymentProxyReporter.new(reporter)
prepare_logs_for_parsing(provision_log_parser, nodes)
nodes_not_booted = nodes.map{ |n| n['uid'] }
result_msg = {'nodes' => []}
begin
Timeout.timeout(Astute.config.provisioning_timeout) do # Timeout for booting target OS
catch :done do
loop do
sleep_not_greater_than(20) do
nodes_types = node_type(proxy_reporter, task_id, nodes.map {|n| n['uid']}, 5)
target_uids, nodes_not_booted, reject_uids = analize_node_types(nodes_types, nodes_not_booted)
if reject_uids.present?
ctx ||= Context.new(task_id, proxy_reporter)
reject_nodes = reject_uids.map { |uid| {'uid' => uid } }
NodesRemover.new(ctx, reject_nodes, reboot=true).remove
end
if nodes_not_booted.empty?
Astute.logger.info "All nodes are provisioned"
throw :done
end
Astute.logger.debug("Still provisioning follow nodes: #{nodes_not_booted}")
report_about_progress(proxy_reporter, provision_log_parser, target_uids, nodes)
end
end
end
# We are here if jumped by throw from while cycle
end
rescue Timeout::Error
Astute.logger.error("Timeout of provisioning is exceeded. Nodes not booted: #{nodes_not_booted}")
nodes_progress = nodes_not_booted.map do |n|
{
'uid' => n,
'status' => 'error',
'error_msg' => "Timeout of provisioning is exceeded",
'progress' => 100,
'error_type' => 'provision'
}
end
result_msg.merge!({
'status' => 'error',
'error' => 'Timeout of provisioning is exceeded',
'progress' => 100})
result_msg['nodes'] += nodes_progress
end
node_uids = nodes.map { |n| n['uid'] }
(node_uids - nodes_not_booted).each do |uid|
result_msg['nodes'] << {'uid' => uid, 'progress' => 100, 'status' => 'provisioned'}
end
# If there was no errors, then set status to ready
result_msg.reverse_merge!({'status' => 'ready', 'progress' => 100})
proxy_reporter.report(result_msg)
result_msg
provisioner = Provisioner.new(@log_parsing)
provisioner.provision(reporter, task_id, engine_attrs, nodes)
end
def remove_nodes(reporter, task_id, engine_attrs, nodes, reboot=true, raise_if_error=false)
cobbler = CobblerManager.new(engine_attrs, reporter)
cobbler.remove_nodes(nodes)
ctx = Context.new(task_id, reporter)
result = NodesRemover.new(ctx, nodes, reboot).remove
if (result['error_nodes'] || result['inaccessible_nodes']) && raise_if_error
bad_node_ids = result.fetch('error_nodes', []) +
result.fetch('inaccessible_nodes', [])
raise "Mcollective problem with nodes #{bad_node_ids}, please check log for details"
end
Rsyslogd.send_sighup(ctx, engine_attrs["master_ip"])
result
provisioner = Provisioner.new(@log_parsing)
provisioner.remove_nodes(reporter, task_id, engine_attrs, nodes, reboot, raise_if_error)
end
def stop_puppet_deploy(reporter, task_id, nodes)
@ -252,21 +80,8 @@ module Astute
end
def stop_provision(reporter, task_id, engine_attrs, nodes)
ctx = Context.new(task_id, reporter)
ssh_result = stop_provision_via_ssh(ctx, nodes, engine_attrs)
# Remove already provisioned node. Possible erasing nodes twice
provisioned_nodes, mco_result = stop_provision_via_mcollective(ctx, nodes)
# For nodes responded via mcollective use mcollective result instead of ssh
['nodes', 'error_nodes', 'inaccessible_nodes'].each do |node_status|
ssh_result[node_status] = ssh_result.fetch(node_status, []) - provisioned_nodes
end
result = merge_rm_nodes_result(ssh_result, mco_result)
result['status'] = 'error' if result['error_nodes'].present?
result
provisioner = Provisioner.new(@log_parsing)
provisioner.stop_provision(reporter, task_id, engine_attrs, nodes)
end
def dump_environment(reporter, task_id, settings)
@ -293,15 +108,6 @@ module Astute
private
def validate_nodes_access(ctx, nodes)
nodes_types = node_type(ctx.reporter, ctx.task_id, nodes.map{ |n| n['uid'] }, timeout=10)
not_avaliable_nodes = nodes.map { |n| n['uid'].to_s } - nodes_types.map { |n| n['uid'].to_s }
unless not_avaliable_nodes.empty?
raise "Network verification not avaliable because nodes #{not_avaliable_nodes} " \
"not avaliable via mcollective"
end
end
def deploy_cluster(up_reporter, task_id, deployment_info, deploy_engine, pre_deployment, post_deployment)
proxy_reporter = ProxyReporter::DeploymentProxyReporter.new(up_reporter, deployment_info)
log_parser = @log_parsing ? LogParser::ParseDeployLogs.new : LogParser::NoParsing.new
@ -322,136 +128,14 @@ module Astute
reporter.report(status)
end
def prepare_logs_for_parsing(provision_log_parser, nodes)
sleep_not_greater_than(10) do # Wait while nodes going to reboot
Astute.logger.info "Starting OS provisioning for nodes: #{nodes.map{ |n| n['uid'] }.join(',')}"
begin
provision_log_parser.prepare(nodes)
rescue => e
Astute.logger.warn "Some error occurred when prepare LogParser: #{e.message}, trace: #{e.format_backtrace}"
end
def validate_nodes_access(ctx, nodes)
nodes_types = node_type(ctx.reporter, ctx.task_id, nodes.map{ |n| n['uid'] }, timeout=10)
not_avaliable_nodes = nodes.map { |n| n['uid'].to_s } - nodes_types.map { |n| n['uid'].to_s }
unless not_avaliable_nodes.empty?
raise "Network verification not avaliable because nodes #{not_avaliable_nodes} " \
"not avaliable via mcollective"
end
end
def analize_node_types(types, nodes_not_booted)
types.each { |t| Astute.logger.debug("Got node types: uid=#{t['uid']} type=#{t['node_type']}") }
target_uids = types.reject{ |n| n['node_type'] != 'target' }.map{ |n| n['uid'] }
reject_uids = types.reject{ |n| n['node_type'] == 'target' }.map{ |n| n['uid'] }
Astute.logger.debug("Not target nodes will be rejected: #{reject_uids.join(',')}")
nodes_not_booted -= target_uids
Astute.logger.debug "Not provisioned: #{nodes_not_booted.join(',')}, " \
"got target OSes: #{target_uids.join(',')}"
return target_uids, nodes_not_booted, reject_uids
end
def sleep_not_greater_than(sleep_time, &block)
time = Time.now.to_f
block.call
time = time + sleep_time - Time.now.to_f
sleep(time) if time > 0
end
def report_about_progress(reporter, provision_log_parser, target_uids, nodes)
begin
nodes_progress = provision_log_parser.progress_calculate(nodes.map{ |n| n['uid'] }, nodes)
nodes_progress.each do |n|
if target_uids.include?(n['uid'])
n['progress'] = 100
n['status'] = 'provisioned'
else
n['status'] = 'provisioning'
end
end
reporter.report({'nodes' => nodes_progress})
rescue => e
Astute.logger.warn "Some error occurred when parse logs for nodes progress: #{e.message}, trace: #{e.format_backtrace}"
end
end
def stop_provision_via_mcollective(ctx, nodes)
return [], {} if nodes.empty?
mco_result = {}
nodes_uids = nodes.map{ |n| n['uid'] }
Astute.config.mc_retries.times do |i|
sleep Astute.config.nodes_remove_interval
Astute.logger.debug "Trying to connect to nodes #{nodes_uids} using mcollective"
nodes_types = node_type(ctx.reporter, ctx.task_id, nodes_uids, 2)
next if nodes_types.empty?
provisioned = nodes_types.select{ |n| ['target', 'bootstrap'].include? n['node_type'] }
.map{ |n| {'uid' => n['uid']} }
current_mco_result = NodesRemover.new(ctx, provisioned, reboot=true).remove
Astute.logger.debug "Retry result #{i}: "\
"mco success nodes: #{current_mco_result['nodes']}, "\
"mco error nodes: #{current_mco_result['error_nodes']}, "\
"mco inaccessible nodes: #{current_mco_result['inaccessible_nodes']}"
mco_result = merge_rm_nodes_result(mco_result, current_mco_result)
nodes_uids -= provisioned.map{ |n| n['uid'] }
break if nodes_uids.empty?
end
provisioned_nodes = nodes.map{ |n| {'uid' => n['uid']} } - nodes_uids.map {|n| {'uid' => n} }
Astute.logger.debug "MCO final result: "\
"mco success nodes: #{mco_result['nodes']}, "\
"mco error nodes: #{mco_result['error_nodes']}, "\
"mco inaccessible nodes: #{mco_result['inaccessible_nodes']}, "\
"all mco nodes: #{provisioned_nodes}"
return provisioned_nodes, mco_result
end
def stop_provision_via_ssh(ctx, nodes, engine_attrs)
ssh_result = Ssh.execute(ctx, nodes, SshEraseNodes.command)
CobblerManager.new(engine_attrs, ctx.reporter).remove_nodes(nodes)
Ssh.execute(ctx,
nodes,
SshHardReboot.command,
timeout=5,
retries=1)
ssh_result
end
def merge_rm_nodes_result(res1, res2)
['nodes', 'error_nodes', 'inaccessible_nodes'].inject({}) do |result, node_status|
result[node_status] = (res1.fetch(node_status, []) + res2.fetch(node_status, [])).uniq
result
end
end
def unlock_nodes_discovery(reporter, task_id="", failed_nodes, nodes)
nodes_uids = nodes.select{ |n| failed_nodes.include?(n['slave_name']) }
.map{ |n| n['uid'] }
shell = MClient.new(Context.new(task_id, reporter),
'execute_shell_command',
nodes_uids,
check_result=false,
timeout=2)
mco_result = shell.execute(:cmd => 'rm -f /var/run/nodiscover')
result = mco_result.map do |n|
{
'uid' => n.results[:sender],
'exit code' => n.results[:data][:exit_code]
}
end
Astute.logger.debug "Unlock discovery for failed nodes. Result: #{result}"
end
def control_reboot_using_ssh(reporter, task_id="", nodes)
ctx = Context.new(task_id, reporter)
nodes.each { |n| n['admin_ip'] = n['power_address'] }
Ssh.execute(ctx,
nodes,
SshHardReboot.command,
timeout=5,
retries=1)
end
end # class
end # module

361
lib/astute/provision.rb Normal file
View File

@ -0,0 +1,361 @@
# Copyright 2015 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
module Astute
class Provisioner
def initialize(log_parsing=false)
@log_parsing = log_parsing
end
def node_type(reporter, task_id, nodes_uids, timeout=nil)
context = Context.new(task_id, reporter)
systemtype = MClient.new(context, "systemtype", nodes_uids, check_result=false, timeout)
systems = systemtype.get_type
systems.map do |n|
{
'uid' => n.results[:sender],
'node_type' => n.results[:data][:node_type].chomp
}
end
end
def provision(reporter, task_id, engine_attrs, nodes)
raise "Nodes to provision are not provided!" if nodes.empty?
provision_method = engine_attrs['provision_method'] || 'cobbler'
cobbler = CobblerManager.new(engine_attrs, reporter)
begin
remove_nodes(
reporter,
task_id,
engine_attrs,
nodes,
reboot=false,
raise_if_error=true
)
cobbler.add_nodes(nodes)
# if provision_method is 'image', we do not need to immediately
# reboot nodes. instead, we need to run image based provisioning
# process and then reboot nodes
# TODO(kozhukalov): do not forget about execute_shell_command timeout which is 3600
# watch_provision_progress has provisioning_timeout + 3600 is much longer than provisioning_timeout
if provision_method == 'image'
# disabling pxe boot
cobbler.netboot_nodes(nodes, false)
image_provision(reporter, task_id, nodes)
end
# TODO(vsharshov): maybe we should reboot nodes using mco or ssh instead of Cobbler
reboot_events = cobbler.reboot_nodes(nodes)
failed_nodes = cobbler.check_reboot_nodes(reboot_events)
# control reboot for nodes which still in bootstrap state
# Note: if the image based provisioning is used nodes are already
# provisioned and rebooting is not necessary. In fact the forced
# reboot can corrupt a node if it manages to reboot fast enough
# (see LP #1394599)
# XXX: actually there's a tiny probability to reboot a node being
# provisioned in a traditional way (by Debian installer or anaconda),
# however such a double reboot is not dangerous since cobbler will
# boot such a node into installer once again.
if provision_method != 'image'
control_reboot_using_ssh(reporter, task_id, nodes)
end
rescue => e
Astute.logger.error("Error occured while provisioning: #{e.inspect}")
reporter.report({
'status' => 'error',
'error' => e.message,
'progress' => 100})
unlock_nodes_discovery(reporter, task_id, nodes.map {|n| n['slave_name']}, nodes)
raise e
end
if failed_nodes.present?
err_msg = "Nodes failed to reboot: #{failed_nodes.inspect}"
Astute.logger.error(err_msg)
reporter.report({
'status' => 'error',
'error' => err_msg,
'progress' => 100})
unlock_nodes_discovery(reporter, task_id="", failed_nodes, nodes)
raise FailedToRebootNodesError.new(err_msg)
end
watch_provision_progress(reporter, task_id, nodes)
end
def image_provision(reporter, task_id, nodes)
failed_uids_provis = ImageProvision.provision(Context.new(task_id, reporter), nodes)
if failed_uids_provis.empty?
reporter.report({
'status' => 'provisioning',
'progress' => 80,
'msg' => 'Nodes have beed successfully provisioned. Next step is reboot.'
})
else
err_msg = 'At least one of nodes have failed during provisioning'
Astute.logger.error("#{task_id}: #{err_msg}")
reporter.report({
'status' => 'error',
'progress' => 100,
'msg' => err_msg,
'error_type' => 'provision'
})
raise FailedImageProvisionError.new(err_msg)
end
end
def watch_provision_progress(reporter, task_id, nodes)
raise "Nodes to provision are not provided!" if nodes.empty?
provision_log_parser = @log_parsing ? LogParser::ParseProvisionLogs.new : LogParser::NoParsing.new
proxy_reporter = ProxyReporter::DeploymentProxyReporter.new(reporter)
prepare_logs_for_parsing(provision_log_parser, nodes)
nodes_not_booted = nodes.map{ |n| n['uid'] }
result_msg = {'nodes' => []}
begin
Timeout.timeout(Astute.config.provisioning_timeout) do # Timeout for booting target OS
catch :done do
loop do
sleep_not_greater_than(20) do
nodes_types = node_type(proxy_reporter, task_id, nodes.map {|n| n['uid']}, 5)
target_uids, nodes_not_booted, reject_uids = analize_node_types(nodes_types, nodes_not_booted)
if reject_uids.present?
ctx ||= Context.new(task_id, proxy_reporter)
reject_nodes = reject_uids.map { |uid| {'uid' => uid } }
NodesRemover.new(ctx, reject_nodes, reboot=true).remove
end
if nodes_not_booted.empty?
Astute.logger.info "All nodes are provisioned"
throw :done
end
Astute.logger.debug("Still provisioning follow nodes: #{nodes_not_booted}")
report_about_progress(proxy_reporter, provision_log_parser, target_uids, nodes)
end
end
end
# We are here if jumped by throw from while cycle
end
rescue Timeout::Error
Astute.logger.error("Timeout of provisioning is exceeded. Nodes not booted: #{nodes_not_booted}")
nodes_progress = nodes_not_booted.map do |n|
{
'uid' => n,
'status' => 'error',
'error_msg' => "Timeout of provisioning is exceeded",
'progress' => 100,
'error_type' => 'provision'
}
end
result_msg.merge!({
'status' => 'error',
'error' => 'Timeout of provisioning is exceeded',
'progress' => 100})
result_msg['nodes'] += nodes_progress
end
node_uids = nodes.map { |n| n['uid'] }
(node_uids - nodes_not_booted).each do |uid|
result_msg['nodes'] << {'uid' => uid, 'progress' => 100, 'status' => 'provisioned'}
end
# If there was no errors, then set status to ready
result_msg.reverse_merge!({'status' => 'ready', 'progress' => 100})
proxy_reporter.report(result_msg)
result_msg
end
def remove_nodes(reporter, task_id, engine_attrs, nodes, reboot=true, raise_if_error=false)
cobbler = CobblerManager.new(engine_attrs, reporter)
cobbler.remove_nodes(nodes)
ctx = Context.new(task_id, reporter)
result = NodesRemover.new(ctx, nodes, reboot).remove
if (result['error_nodes'] || result['inaccessible_nodes']) && raise_if_error
bad_node_ids = result.fetch('error_nodes', []) +
result.fetch('inaccessible_nodes', [])
raise "Mcollective problem with nodes #{bad_node_ids}, please check log for details"
end
Rsyslogd.send_sighup(ctx, engine_attrs["master_ip"])
result
end
def stop_provision(reporter, task_id, engine_attrs, nodes)
ctx = Context.new(task_id, reporter)
ssh_result = stop_provision_via_ssh(ctx, nodes, engine_attrs)
# Remove already provisioned node. Possible erasing nodes twice
provisioned_nodes, mco_result = stop_provision_via_mcollective(ctx, nodes)
# For nodes responded via mcollective use mcollective result instead of ssh
['nodes', 'error_nodes', 'inaccessible_nodes'].each do |node_status|
ssh_result[node_status] = ssh_result.fetch(node_status, []) - provisioned_nodes
end
result = merge_rm_nodes_result(ssh_result, mco_result)
result['status'] = 'error' if result['error_nodes'].present?
result
end
private
def prepare_logs_for_parsing(provision_log_parser, nodes)
sleep_not_greater_than(10) do # Wait while nodes going to reboot
Astute.logger.info "Starting OS provisioning for nodes: #{nodes.map{ |n| n['uid'] }.join(',')}"
begin
provision_log_parser.prepare(nodes)
rescue => e
Astute.logger.warn "Some error occurred when prepare LogParser: #{e.message}, trace: #{e.format_backtrace}"
end
end
end
def analize_node_types(types, nodes_not_booted)
types.each { |t| Astute.logger.debug("Got node types: uid=#{t['uid']} type=#{t['node_type']}") }
target_uids = types.reject{ |n| n['node_type'] != 'target' }.map{ |n| n['uid'] }
reject_uids = types.reject{ |n| n['node_type'] == 'target' }.map{ |n| n['uid'] }
Astute.logger.debug("Not target nodes will be rejected: #{reject_uids.join(',')}")
nodes_not_booted -= target_uids
Astute.logger.debug "Not provisioned: #{nodes_not_booted.join(',')}, " \
"got target OSes: #{target_uids.join(',')}"
return target_uids, nodes_not_booted, reject_uids
end
def sleep_not_greater_than(sleep_time, &block)
time = Time.now.to_f
block.call
time = time + sleep_time - Time.now.to_f
sleep(time) if time > 0
end
def report_about_progress(reporter, provision_log_parser, target_uids, nodes)
begin
nodes_progress = provision_log_parser.progress_calculate(nodes.map{ |n| n['uid'] }, nodes)
nodes_progress.each do |n|
if target_uids.include?(n['uid'])
n['progress'] = 100
n['status'] = 'provisioned'
else
n['status'] = 'provisioning'
end
end
reporter.report({'nodes' => nodes_progress})
rescue => e
Astute.logger.warn "Some error occurred when parse logs for nodes progress: #{e.message}, trace: #{e.format_backtrace}"
end
end
def stop_provision_via_mcollective(ctx, nodes)
return [], {} if nodes.empty?
mco_result = {}
nodes_uids = nodes.map{ |n| n['uid'] }
Astute.config.mc_retries.times do |i|
sleep Astute.config.nodes_remove_interval
Astute.logger.debug "Trying to connect to nodes #{nodes_uids} using mcollective"
nodes_types = node_type(ctx.reporter, ctx.task_id, nodes_uids, 2)
next if nodes_types.empty?
provisioned = nodes_types.select{ |n| ['target', 'bootstrap'].include? n['node_type'] }
.map{ |n| {'uid' => n['uid']} }
current_mco_result = NodesRemover.new(ctx, provisioned, reboot=true).remove
Astute.logger.debug "Retry result #{i}: "\
"mco success nodes: #{current_mco_result['nodes']}, "\
"mco error nodes: #{current_mco_result['error_nodes']}, "\
"mco inaccessible nodes: #{current_mco_result['inaccessible_nodes']}"
mco_result = merge_rm_nodes_result(mco_result, current_mco_result)
nodes_uids -= provisioned.map{ |n| n['uid'] }
break if nodes_uids.empty?
end
provisioned_nodes = nodes.map{ |n| {'uid' => n['uid']} } - nodes_uids.map {|n| {'uid' => n} }
Astute.logger.debug "MCO final result: "\
"mco success nodes: #{mco_result['nodes']}, "\
"mco error nodes: #{mco_result['error_nodes']}, "\
"mco inaccessible nodes: #{mco_result['inaccessible_nodes']}, "\
"all mco nodes: #{provisioned_nodes}"
return provisioned_nodes, mco_result
end
def stop_provision_via_ssh(ctx, nodes, engine_attrs)
ssh_result = Ssh.execute(ctx, nodes, SshEraseNodes.command)
CobblerManager.new(engine_attrs, ctx.reporter).remove_nodes(nodes)
Ssh.execute(ctx,
nodes,
SshHardReboot.command,
timeout=5,
retries=1)
ssh_result
end
def unlock_nodes_discovery(reporter, task_id="", failed_nodes, nodes)
nodes_uids = nodes.select{ |n| failed_nodes.include?(n['slave_name']) }
.map{ |n| n['uid'] }
shell = MClient.new(Context.new(task_id, reporter),
'execute_shell_command',
nodes_uids,
check_result=false,
timeout=2)
mco_result = shell.execute(:cmd => 'rm -f /var/run/nodiscover')
result = mco_result.map do |n|
{
'uid' => n.results[:sender],
'exit code' => n.results[:data][:exit_code]
}
end
Astute.logger.debug "Unlock discovery for failed nodes. Result: #{result}"
end
def control_reboot_using_ssh(reporter, task_id="", nodes)
ctx = Context.new(task_id, reporter)
nodes.each { |n| n['admin_ip'] = n['power_address'] }
Ssh.execute(ctx,
nodes,
SshHardReboot.command,
timeout=5,
retries=1)
end
def merge_rm_nodes_result(res1, res2)
['nodes', 'error_nodes', 'inaccessible_nodes'].inject({}) do |result, node_status|
result[node_status] = (res1.fetch(node_status, []) + res2.fetch(node_status, [])).uniq
result
end
end
end
end

View File

@ -51,131 +51,6 @@ describe Astute::Orchestrator do
end
end
describe '#node_type' do
it "must be able to return node type" do
nodes = [{'uid' => '1'}]
res = {:data => {:node_type => 'target'},
:sender=>"1"}
mc_res = mock_mc_result(res)
mc_timeout = 5
rpcclient = mock_rpcclient(nodes, mc_timeout)
rpcclient.expects(:get_type).once.returns([mc_res])
types = @orchestrator.node_type(@reporter, 'task_uuid', nodes.map { |n| n['uid'] }, mc_timeout)
types.should eql([{"node_type"=>"target", "uid"=>"1"}])
end
end
describe '#remove_nodes' do
let(:nodes) { [{'uid' => '1', 'slave_name' => ''}] }
let(:engine_attrs) do
{
"url"=>"http://localhost/cobbler_api",
"username"=>"cobbler",
"password"=>"cobbler",
"master_ip"=>"127.0.0.1",
}
end
before(:each) do
remote = mock() do
stubs(:call)
stubs(:call).with('login', 'cobbler', 'cobbler').returns('remotetoken')
end
XMLRPC::Client = mock() do
stubs(:new).returns(remote)
end
end
it 'should use NodeRemover to remove nodes' do
Astute::NodesRemover.any_instance.expects(:remove).once.returns({})
Astute::Rsyslogd.expects(:send_sighup).once
@orchestrator.remove_nodes(@reporter, task_id="task_id", engine_attrs, nodes, reboot=true)
end
it 'should return list of nodes which removed' do
Astute::NodesRemover.any_instance.expects(:remove).once.returns({"nodes"=>[{"uid"=>"1"}]})
Astute::Rsyslogd.stubs(:send_sighup).once
expect(@orchestrator.remove_nodes(
@reporter,
task_id="task_id",
engine_attrs,
nodes,
reboot=true
)).to eql({"nodes"=>[{"uid"=>"1"}]})
end
context 'if exception in case of error enable' do
it 'should raise error if nodes removing operation via mcollective failed(error)' do
Astute::NodesRemover.any_instance.expects(:remove).once.returns({
'status' => 'error',
'error_nodes' => [{"uid"=>"1"}]
})
Astute::Rsyslogd.stubs(:send_sighup).never
expect {@orchestrator.remove_nodes(
@reporter,
task_id="task_id",
engine_attrs,
nodes,
reboot=true,
raise_if_error=true
)}.to raise_error(/Mcollective problem with nodes/)
end
it 'should raise error if nodes removing operation via mcollective failed(inaccessible)' do
Astute::NodesRemover.any_instance.expects(:remove).once.returns({
'inaccessible_nodes' => [{"uid"=>"1"}]
})
Astute::Rsyslogd.stubs(:send_sighup).never
expect {@orchestrator.remove_nodes(
@reporter,
task_id="task_id",
engine_attrs,
nodes,
reboot=true,
raise_if_error=true
)}.to raise_error(/Mcollective problem with nodes/)
end
end #exception
context 'cobbler' do
it 'should remove nodes from cobbler if node exist' do
Astute::Provision::Cobbler.any_instance.stubs(:system_exists?).returns(true).twice
Astute::NodesRemover.any_instance.stubs(:remove).once.returns({})
Astute::Rsyslogd.expects(:send_sighup).once
Astute::Provision::Cobbler.any_instance.expects(:remove_system).with(nodes.first['slave_name'])
@orchestrator.remove_nodes(@reporter, task_id="task_id", engine_attrs, nodes, reboot=true)
end
it 'should not try to remove nodes from cobbler if node do not exist' do
Astute::Provision::Cobbler.any_instance.stubs(:system_exists?).returns(false)
Astute::NodesRemover.any_instance.stubs(:remove).once.returns({})
Astute::Rsyslogd.expects(:send_sighup).once
Astute::Provision::Cobbler.any_instance.expects(:remove_system).with(nodes.first['slave_name']).never
@orchestrator.remove_nodes(@reporter, task_id="task_id", engine_attrs, nodes, reboot=true)
end
it 'should inform about nodes if remove operation fail' do
Astute::Provision::Cobbler.any_instance.stubs(:system_exists?)
.returns(true)
.then.returns(true)
Astute::NodesRemover.any_instance.stubs(:remove).once.returns({})
Astute::Rsyslogd.expects(:send_sighup).once
Astute::Provision::Cobbler.any_instance.expects(:remove_system).with(nodes.first['slave_name'])
@orchestrator.remove_nodes(@reporter, task_id="task_id", engine_attrs, nodes, reboot=true)
end
end #cobbler
end #remove_nodes
describe '#deploy' do
it "calls with valid arguments without nailgun hooks" do
nodes = [{'uid' => 1, 'role' => 'controller'}]
@ -254,462 +129,6 @@ describe Astute::Orchestrator do
}
end
describe '#provision' do
context 'cobler cases' do
it "raise error if cobler settings empty" do
@orchestrator.stubs(:watch_provision_progress).returns(nil)
expect {@orchestrator.provision(@reporter, data['task_uuid'], {}, data['nodes'])}.
to raise_error(/Settings for Cobbler must be set/)
end
end
context 'node state cases' do
before(:each) do
remote = mock() do
stubs(:call)
stubs(:call).with('login', 'cobbler', 'cobbler').returns('remotetoken')
end
XMLRPC::Client = mock() do
stubs(:new).returns(remote)
end
@orchestrator.stubs(:remove_nodes).returns([])
Astute::CobblerManager.any_instance.stubs(:sleep)
end
before(:each) do
@orchestrator.stubs(:watch_provision_progress).returns(nil)
@orchestrator.stubs(:control_reboot_using_ssh).returns(nil)
end
it "raises error if nodes list is empty" do
expect {@orchestrator.provision(@reporter, data['task_uuid'], data['engine'], {})}.
to raise_error(/Nodes to provision are not provided!/)
end
it "try to reboot nodes from list" do
Astute::Provision::Cobbler.any_instance do
expects(:power_reboot).with('controller-1')
end
Astute::CobblerManager.any_instance.stubs(:check_reboot_nodes).returns([])
@orchestrator.provision(@reporter, data['task_uuid'], data['engine'], data['nodes'])
end
before(:each) { Astute::Provision::Cobbler.any_instance.stubs(:power_reboot).returns(333) }
context 'node reboot success' do
before(:each) { Astute::Provision::Cobbler.any_instance.stubs(:event_status).
returns([Time.now.to_f, 'controller-1', 'complete'])}
it "does not find failed nodes" do
Astute::Provision::Cobbler.any_instance.stubs(:event_status).
returns([Time.now.to_f, 'controller-1', 'complete'])
@orchestrator.provision(@reporter, data['task_uuid'], data['engine'], data['nodes'])
end
it "sync engine state" do
Astute::Provision::Cobbler.any_instance do
expects(:sync).once
end
@orchestrator.provision(@reporter, data['task_uuid'], data['engine'], data['nodes'])
end
it "should erase mbr for nodes" do
@orchestrator.expects(:remove_nodes).with(
@reporter,
data['task_uuid'],
data['engine'],
data['nodes'],
reboot=false,
fail_if_error=true
).returns([])
@orchestrator.provision(@reporter, data['task_uuid'], data['engine'], data['nodes'])
end
it 'should not try to unlock node discovery' do
@orchestrator.expects(:unlock_nodes_discovery).never
@orchestrator.provision(@reporter, data['task_uuid'], data['engine'], data['nodes'])
end
it 'should try to reboot nodes using ssh(insurance for cobbler)' do
@orchestrator.expects(:control_reboot_using_ssh).with(@reporter, data['task_uuid'], data['nodes']).once
@orchestrator.provision(@reporter, data['task_uuid'], data['engine'], data['nodes'])
end
end
context 'node reboot fail' do
before(:each) do
Astute::Provision::Cobbler.any_instance
.stubs(:event_status)
.returns([Time.now.to_f, 'controller-1', 'failed'])
@orchestrator.stubs(:unlock_nodes_discovery)
end
it "should sync engine state" do
Astute::Provision::Cobbler.any_instance do
expects(:sync).once
end
begin
@orchestrator.stubs(:watch_provision_progress).returns(nil)
@orchestrator.provision(@reporter, data['task_uuid'], data['engine'], data['nodes'])
rescue
end
end
it "raise error if failed node find" do
expect do
@orchestrator.stubs(:watch_provision_progress).returns(nil)
@orchestrator.provision(@reporter, data['task_uuid'], data['engine'], data['nodes'])
end.to raise_error(Astute::FailedToRebootNodesError)
end
it "should try to unlock nodes discovery" do
@orchestrator.expects(:unlock_nodes_discovery)
begin
@orchestrator.provision(@reporter, data['task_uuid'], data['engine'], data['nodes'])
rescue
end
end
it 'should not try to reboot nodes using ssh(insurance for cobbler)' do
@orchestrator.expects(:control_reboot_using_ssh).never
begin
@orchestrator.provision(@reporter, data['task_uuid'], data['engine'], data['nodes'])
rescue
end
end
end
end
end
describe '#watch_provision_progress' do
before(:each) do
# Disable sleeping in test env (doubles the test speed)
def @orchestrator.sleep_not_greater_than(time, &block)
block.call
end
end
it "raises error if nodes list is empty" do
expect {@orchestrator.watch_provision_progress(@reporter, data['task_uuid'], {})}.
to raise_error(/Nodes to provision are not provided!/)
end
it "prepare provision log for parsing" do
Astute::LogParser::ParseProvisionLogs.any_instance do
expects(:prepare).with(data['nodes']).once
end
@orchestrator.stubs(:report_about_progress).returns()
@orchestrator.stubs(:node_type).returns([{'uid' => '1', 'node_type' => 'target' }])
@orchestrator.watch_provision_progress(@reporter, data['task_uuid'], data['nodes'])
end
it "ignore problem with parsing provision log" do
Astute::LogParser::ParseProvisionLogs.any_instance do
stubs(:prepare).with(data['nodes']).raises
end
@orchestrator.stubs(:report_about_progress).returns()
@orchestrator.stubs(:node_type).returns([{'uid' => '1', 'node_type' => 'target' }])
@orchestrator.watch_provision_progress(@reporter, data['task_uuid'], data['nodes'])
end
it 'provision nodes using mclient' do
@orchestrator.stubs(:report_about_progress).returns()
@orchestrator.expects(:node_type).returns([{'uid' => '1', 'node_type' => 'target' }])
@orchestrator.watch_provision_progress(@reporter, data['task_uuid'], data['nodes'])
end
it "fail if timeout of provisioning is exceeded" do
Astute::LogParser::ParseProvisionLogs.any_instance do
stubs(:prepare).returns()
end
Timeout.stubs(:timeout).raises(Timeout::Error)
msg = 'Timeout of provisioning is exceeded'
error_msg = {
'status' => 'error',
'error' => msg,
'progress' => 100,
'nodes' => [{
'uid' => '1',
'status' => 'error',
'error_msg' => msg,
'progress' => 100,
'error_type' => 'provision'}]}
@reporter.expects(:report).with(error_msg).once
@orchestrator.watch_provision_progress(@reporter, data['task_uuid'], data['nodes'])
end
it 'success report if all nodes were provisioned' do
@orchestrator.stubs(:report_about_progress).returns()
@orchestrator.expects(:node_type).returns([{'uid' => '1', 'node_type' => 'target' }])
@orchestrator.stubs(:analize_node_types).returns([['1'], []])
success_msg = {
'status' => 'ready',
'progress' => 100,
'nodes' => [{
'uid' => '1',
'status' => 'provisioned',
'progress' => 100}]}
@reporter.expects(:report).with(success_msg).once
@orchestrator.watch_provision_progress(@reporter, data['task_uuid'], data['nodes'])
end
it 'success report if all nodes report about success at least once' do
nodes = [
{ 'uid' => '1'},
{ 'uid' => '2'}
]
@orchestrator.stubs(:report_about_progress).returns()
@orchestrator.stubs(:node_type)
.returns([{'uid' => '1', 'node_type' => 'target' }])
.then.returns([{'uid' => '2', 'node_type' => 'target' }])
success_msg = {
'status' => 'ready',
'progress' => 100,
'nodes' => [{
'uid' => '1',
'status' => 'provisioned',
'progress' => 100},
{
'uid' => '2',
'status' => 'provisioned',
'progress' => 100}
]}
@reporter.expects(:report).with(success_msg).once
@orchestrator.watch_provision_progress(@reporter, data['task_uuid'], nodes)
end
it "unexpecting bootstrap nodes should be ereased and rebooted" do
nodes = [
{ 'uid' => '1'},
{ 'uid' => '2'}
]
@orchestrator.stubs(:report_about_progress).returns()
@orchestrator.stubs(:node_type)
.returns([{'uid' => '1', 'node_type' => 'target' }])
.then.returns([{'uid' => '2', 'node_type' => 'bootstrap' }])
.then.returns([{'uid' => '2', 'node_type' => 'bootstrap' }])
.then.returns([{'uid' => '2', 'node_type' => 'target' }])
Astute::NodesRemover.any_instance.expects(:remove)
.twice.returns({"nodes"=>[{"uid"=>"2", }]})
success_msg = {
'status' => 'ready',
'progress' => 100,
'nodes' => [{
'uid' => '1',
'status' => 'provisioned',
'progress' => 100},
{
'uid' => '2',
'status' => 'provisioned',
'progress' => 100}
]}
@reporter.expects(:report).with(success_msg).once
@orchestrator.watch_provision_progress(@reporter, data['task_uuid'], nodes)
end
end
describe '#stop_provision' do
around(:each) do |example|
old_ssh_retries = Astute.config.ssh_retries
old_mc_retries = Astute.config.mc_retries
old_nodes_rm_interal = Astute.config.nodes_remove_interval
example.run
Astute.config.ssh_retries = old_ssh_retries
Astute.config.mc_retries = old_mc_retries
Astute.config.nodes_remove_interval = old_nodes_rm_interal
end
before(:each) do
Astute.config.ssh_retries = 1
Astute.config.mc_retries = 1
Astute.config.nodes_remove_interval = 0
end
it 'erase nodes using ssh' do
Astute::CobblerManager.any_instance.stubs(:remove_nodes).returns([])
@orchestrator.stubs(:stop_provision_via_mcollective).returns([[], {}])
Astute::Ssh.stubs(:execute).returns({'inaccessible_nodes' => [{'uid' => 1}]}).once
Astute::Ssh.expects(:execute).with(instance_of(Astute::Context),
data['nodes'],
Astute::SshEraseNodes.command)
.returns({'nodes' => [{'uid' => 1}]})
expect(@orchestrator.stop_provision(@reporter,
data['task_uuid'],
data['engine'],
data['nodes']))
.to eql({
"error_nodes" => [],
"inaccessible_nodes" => [],
"nodes" => [{"uid"=>1}]
})
end
it 'always remove nodes from Cobbler' do
Astute::Ssh.stubs(:execute).twice.returns({'inaccessible_nodes' => [{'uid' => 1}]})
@orchestrator.stubs(:stop_provision_via_mcollective).returns([[], {}])
Astute::CobblerManager.any_instance.expects(:remove_nodes)
.with(data['nodes'])
.returns([])
@orchestrator.stop_provision(@reporter,
data['task_uuid'],
data['engine'],
data['nodes'])
end
it 'reboot nodes using using ssh' do
Astute::CobblerManager.any_instance.stubs(:remove_nodes).returns([])
@orchestrator.stubs(:stop_provision_via_mcollective).returns([[], {}])
Astute::Ssh.stubs(:execute).returns({'nodes' => [{'uid' => 1}]}).once
Astute::Ssh.expects(:execute).with(instance_of(Astute::Context),
data['nodes'],
Astute::SshHardReboot.command,
timeout=5,
retries=1)
.returns({'inaccessible_nodes' => [{'uid' => 1}]})
expect(@orchestrator.stop_provision(@reporter,
data['task_uuid'],
data['engine'],
data['nodes']))
.to eql({
"error_nodes" => [],
"inaccessible_nodes" => [],
"nodes" => [{"uid"=>1}]
})
end
it 'stop provision if provision operation stop immediately' do
@orchestrator.stubs(:stop_provision_via_ssh)
.returns({'inaccessible_nodes' => [{'uid' => '1'}]})
@orchestrator.stubs(:node_type).returns([{'uid' => '1', 'node_type' => 'bootstrap'}])
Astute::NodesRemover.any_instance.expects(:remove)
.once.returns({"nodes"=>[{"uid"=>"1", }]})
expect(@orchestrator.stop_provision(@reporter,
data['task_uuid'],
data['engine'],
data['nodes']))
.to eql({
"error_nodes" => [],
"inaccessible_nodes" => [],
"nodes" => [{"uid"=>"1"}]
})
end
it 'stop provision if provision operation stop in the end' do
@orchestrator.stubs(:stop_provision_via_ssh)
.returns({'nodes' => [{'uid' => "1"}]})
@orchestrator.stubs(:node_type).returns([{'uid' => "1", 'node_type' => 'target'}])
Astute::NodesRemover.any_instance.expects(:remove)
.once.returns({"nodes"=>[{"uid"=>"1", }]})
expect(@orchestrator.stop_provision(@reporter,
data['task_uuid'],
data['engine'],
data['nodes']))
.to eql({
"error_nodes" => [],
"inaccessible_nodes" => [],
"nodes" => [{"uid"=>"1"}]
})
end
it 'inform about inaccessible nodes' do
Astute::Ssh.stubs(:execute).returns({'inaccessible_nodes' => [{'uid' => 1}]}).twice
Astute::CobblerManager.any_instance.stubs(:remove_nodes).returns([])
@orchestrator.stubs(:node_type).returns([])
Astute::NodesRemover.any_instance.expects(:remove).never
expect(@orchestrator.stop_provision(@reporter,
data['task_uuid'],
data['engine'],
data['nodes']))
.to eql({
"error_nodes" => [],
"inaccessible_nodes" => [{"uid"=>1}],
"nodes" => []
})
end
it 'sleep between attempts to find and erase nodes using mcollective' do
@orchestrator.stubs(:stop_provision_via_ssh)
.returns({'inaccessible_nodes' => [{'uid' => '1'}]})
@orchestrator.stubs(:node_type).returns([{'uid' => '1', 'node_type' => 'bootstrap'}])
Astute::NodesRemover.any_instance.stubs(:remove)
.once.returns({"nodes"=>[{"uid"=>"1", }]})
@orchestrator.expects(:sleep).with(Astute.config.nodes_remove_interval)
@orchestrator.stop_provision(@reporter,
data['task_uuid'],
data['engine'],
data['nodes'])
end
it 'perform several attempts to find and erase nodes using mcollective' do
Astute.config.mc_retries = 2
Astute.config.nodes_remove_interval = 0
@orchestrator.stubs(:stop_provision_via_ssh)
.returns({'nodes' => [{'uid' => "1"}],
'inaccessible_nodes' => [{'uid' => '2'}]})
@orchestrator.stubs(:node_type).twice
.returns([{'uid' => '1', 'node_type' => 'bootstrap'}])
.then.returns([{'uid' => '2', 'node_type' => 'target'}])
Astute::NodesRemover.any_instance.stubs(:remove).twice
.returns({"nodes"=>[{"uid"=>"1"}]}).then
.returns({"error_nodes"=>[{"uid"=>"2"}]})
data['nodes'] << {
"uid" => '2',
"slave_name"=>"controller-2",
"admin_ip" =>'1.2.3.6'
}
expect(@orchestrator.stop_provision(@reporter,
data['task_uuid'],
data['engine'],
data['nodes']))
.to eql({
"error_nodes" => [{"uid"=>'2'}],
"inaccessible_nodes" => [],
"nodes" => [{"uid"=>"1"}],
"status" => "error"
})
end
end # stop_provision
describe '#execute_tasks' do
it 'should execute tasks using nailgun hooks' do
@orchestrator.stubs(:report_result)

663
spec/unit/provision_spec.rb Normal file
View File

@ -0,0 +1,663 @@
# Copyright 2015 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
require File.join(File.dirname(__FILE__), '../spec_helper')
describe Astute::Provisioner do
include SpecHelpers
before(:each) do
@provisioner = Astute::Provisioner.new
@reporter = mock('reporter')
@reporter.stub_everything
end
describe '#node_type' do
it "must be able to return node type" do
nodes = [{'uid' => '1'}]
res = {:data => {:node_type => 'target'},
:sender=>"1"}
mc_res = mock_mc_result(res)
mc_timeout = 5
rpcclient = mock_rpcclient(nodes, mc_timeout)
rpcclient.expects(:get_type).once.returns([mc_res])
types = @provisioner.node_type(@reporter, 'task_uuid', nodes.map { |n| n['uid'] }, mc_timeout)
types.should eql([{"node_type"=>"target", "uid"=>"1"}])
end
end
describe '#remove_nodes' do
let(:nodes) { [{'uid' => '1', 'slave_name' => ''}] }
let(:engine_attrs) do
{
"url"=>"http://localhost/cobbler_api",
"username"=>"cobbler",
"password"=>"cobbler",
"master_ip"=>"127.0.0.1",
}
end
before(:each) do
remote = mock() do
stubs(:call)
stubs(:call).with('login', 'cobbler', 'cobbler').returns('remotetoken')
end
XMLRPC::Client = mock() do
stubs(:new).returns(remote)
end
end
it 'should use NodeRemover to remove nodes' do
Astute::NodesRemover.any_instance.expects(:remove).once.returns({})
Astute::Rsyslogd.expects(:send_sighup).once
@provisioner.remove_nodes(@reporter, task_id="task_id", engine_attrs, nodes, reboot=true)
end
it 'should return list of nodes which removed' do
Astute::NodesRemover.any_instance.expects(:remove).once.returns({"nodes"=>[{"uid"=>"1"}]})
Astute::Rsyslogd.stubs(:send_sighup).once
expect(@provisioner.remove_nodes(
@reporter,
task_id="task_id",
engine_attrs,
nodes,
reboot=true
)).to eql({"nodes"=>[{"uid"=>"1"}]})
end
context 'if exception in case of error enable' do
it 'should raise error if nodes removing operation via mcollective failed(error)' do
Astute::NodesRemover.any_instance.expects(:remove).once.returns({
'status' => 'error',
'error_nodes' => [{"uid"=>"1"}]
})
Astute::Rsyslogd.stubs(:send_sighup).never
expect {@provisioner.remove_nodes(
@reporter,
task_id="task_id",
engine_attrs,
nodes,
reboot=true,
raise_if_error=true
)}.to raise_error(/Mcollective problem with nodes/)
end
it 'should raise error if nodes removing operation via mcollective failed(inaccessible)' do
Astute::NodesRemover.any_instance.expects(:remove).once.returns({
'inaccessible_nodes' => [{"uid"=>"1"}]
})
Astute::Rsyslogd.stubs(:send_sighup).never
expect {@provisioner.remove_nodes(
@reporter,
task_id="task_id",
engine_attrs,
nodes,
reboot=true,
raise_if_error=true
)}.to raise_error(/Mcollective problem with nodes/)
end
end #exception
context 'cobbler' do
it 'should remove nodes from cobbler if node exist' do
Astute::Provision::Cobbler.any_instance.stubs(:system_exists?).returns(true).twice
Astute::NodesRemover.any_instance.stubs(:remove).once.returns({})
Astute::Rsyslogd.expects(:send_sighup).once
Astute::Provision::Cobbler.any_instance.expects(:remove_system).with(nodes.first['slave_name'])
@provisioner.remove_nodes(@reporter, task_id="task_id", engine_attrs, nodes, reboot=true)
end
it 'should not try to remove nodes from cobbler if node do not exist' do
Astute::Provision::Cobbler.any_instance.stubs(:system_exists?).returns(false)
Astute::NodesRemover.any_instance.stubs(:remove).once.returns({})
Astute::Rsyslogd.expects(:send_sighup).once
Astute::Provision::Cobbler.any_instance.expects(:remove_system).with(nodes.first['slave_name']).never
@provisioner.remove_nodes(@reporter, task_id="task_id", engine_attrs, nodes, reboot=true)
end
it 'should inform about nodes if remove operation fail' do
Astute::Provision::Cobbler.any_instance.stubs(:system_exists?)
.returns(true)
.then.returns(true)
Astute::NodesRemover.any_instance.stubs(:remove).once.returns({})
Astute::Rsyslogd.expects(:send_sighup).once
Astute::Provision::Cobbler.any_instance.expects(:remove_system).with(nodes.first['slave_name'])
@provisioner.remove_nodes(@reporter, task_id="task_id", engine_attrs, nodes, reboot=true)
end
end #cobbler
end #remove_nodes
let(:data) do
{
"engine"=>{
"url"=>"http://localhost/cobbler_api",
"username"=>"cobbler",
"password"=>"cobbler",
"master_ip"=>"127.0.0.1",
"provision_method"=>"cobbler",
},
"task_uuid"=>"a5c44b9a-285a-4a0c-ae65-2ed6b3d250f4",
"nodes" => [
{
'uid' => '1',
'profile' => 'centos-x86_64',
"slave_name"=>"controller-1",
"admin_ip" =>'1.2.3.5',
'power_type' => 'ssh',
'power_user' => 'root',
'power_pass' => '/root/.ssh/bootstrap.rsa',
'power-address' => '1.2.3.5',
'hostname' => 'name.domain.tld',
'name_servers' => '1.2.3.4 1.2.3.100',
'name_servers_search' => 'some.domain.tld domain.tld',
'netboot_enabled' => '1',
'ks_meta' => 'some_param=1 another_param=2',
'interfaces' => {
'eth0' => {
'mac_address' => '00:00:00:00:00:00',
'static' => '1',
'netmask' => '255.255.255.0',
'ip_address' => '1.2.3.5',
'dns_name' => 'node.mirantis.net',
},
'eth1' => {
'mac_address' => '00:00:00:00:00:01',
'static' => '0',
'netmask' => '255.255.255.0',
'ip_address' => '1.2.3.6',
}
},
'interfaces_extra' => {
'eth0' => {
'peerdns' => 'no',
'onboot' => 'yes',
},
'eth1' => {
'peerdns' => 'no',
'onboot' => 'yes',
}
}
}
]
}
end
describe '#provision' do
context 'cobler cases' do
it "raise error if cobler settings empty" do
@provisioner.stubs(:watch_provision_progress).returns(nil)
expect {@provisioner.provision(@reporter, data['task_uuid'], {}, data['nodes'])}.
to raise_error(/Settings for Cobbler must be set/)
end
end
context 'node state cases' do
before(:each) do
remote = mock() do
stubs(:call)
stubs(:call).with('login', 'cobbler', 'cobbler').returns('remotetoken')
end
XMLRPC::Client = mock() do
stubs(:new).returns(remote)
end
@provisioner.stubs(:remove_nodes).returns([])
Astute::CobblerManager.any_instance.stubs(:sleep)
end
before(:each) do
@provisioner.stubs(:watch_provision_progress).returns(nil)
@provisioner.stubs(:control_reboot_using_ssh).returns(nil)
end
it "raises error if nodes list is empty" do
expect {@provisioner.provision(@reporter, data['task_uuid'], data['engine'], {})}.
to raise_error(/Nodes to provision are not provided!/)
end
it "try to reboot nodes from list" do
Astute::Provision::Cobbler.any_instance do
expects(:power_reboot).with('controller-1')
end
Astute::CobblerManager.any_instance.stubs(:check_reboot_nodes).returns([])
@provisioner.provision(@reporter, data['task_uuid'], data['engine'], data['nodes'])
end
before(:each) { Astute::Provision::Cobbler.any_instance.stubs(:power_reboot).returns(333) }
context 'node reboot success' do
before(:each) { Astute::Provision::Cobbler.any_instance.stubs(:event_status).
returns([Time.now.to_f, 'controller-1', 'complete'])}
it "does not find failed nodes" do
Astute::Provision::Cobbler.any_instance.stubs(:event_status).
returns([Time.now.to_f, 'controller-1', 'complete'])
@provisioner.provision(@reporter, data['task_uuid'], data['engine'], data['nodes'])
end
it "sync engine state" do
Astute::Provision::Cobbler.any_instance do
expects(:sync).once
end
@provisioner.provision(@reporter, data['task_uuid'], data['engine'], data['nodes'])
end
it "should erase mbr for nodes" do
@provisioner.expects(:remove_nodes).with(
@reporter,
data['task_uuid'],
data['engine'],
data['nodes'],
reboot=false,
fail_if_error=true
).returns([])
@provisioner.provision(@reporter, data['task_uuid'], data['engine'], data['nodes'])
end
it 'should not try to unlock node discovery' do
@provisioner.expects(:unlock_nodes_discovery).never
@provisioner.provision(@reporter, data['task_uuid'], data['engine'], data['nodes'])
end
it 'should try to reboot nodes using ssh(insurance for cobbler)' do
@provisioner.expects(:control_reboot_using_ssh).with(@reporter, data['task_uuid'], data['nodes']).once
@provisioner.provision(@reporter, data['task_uuid'], data['engine'], data['nodes'])
end
end
context 'node reboot fail' do
before(:each) do
Astute::Provision::Cobbler.any_instance
.stubs(:event_status)
.returns([Time.now.to_f, 'controller-1', 'failed'])
@provisioner.stubs(:unlock_nodes_discovery)
end
it "should sync engine state" do
Astute::Provision::Cobbler.any_instance do
expects(:sync).once
end
begin
@provisioner.stubs(:watch_provision_progress).returns(nil)
@provisioner.provision(@reporter, data['task_uuid'], data['engine'], data['nodes'])
rescue
end
end
it "raise error if failed node find" do
expect do
@provisioner.stubs(:watch_provision_progress).returns(nil)
@provisioner.provision(@reporter, data['task_uuid'], data['engine'], data['nodes'])
end.to raise_error(Astute::FailedToRebootNodesError)
end
it "should try to unlock nodes discovery" do
@provisioner.expects(:unlock_nodes_discovery)
begin
@provisioner.provision(@reporter, data['task_uuid'], data['engine'], data['nodes'])
rescue
end
end
it 'should not try to reboot nodes using ssh(insurance for cobbler)' do
@provisioner.expects(:control_reboot_using_ssh).never
begin
@provisioner.provision(@reporter, data['task_uuid'], data['engine'], data['nodes'])
rescue
end
end
end
end
end
describe '#watch_provision_progress' do
before(:each) do
# Disable sleeping in test env (doubles the test speed)
def @provisioner.sleep_not_greater_than(time, &block)
block.call
end
end
it "raises error if nodes list is empty" do
expect {@provisioner.watch_provision_progress(@reporter, data['task_uuid'], {})}.
to raise_error(/Nodes to provision are not provided!/)
end
it "prepare provision log for parsing" do
Astute::LogParser::ParseProvisionLogs.any_instance do
expects(:prepare).with(data['nodes']).once
end
@provisioner.stubs(:report_about_progress).returns()
@provisioner.stubs(:node_type).returns([{'uid' => '1', 'node_type' => 'target' }])
@provisioner.watch_provision_progress(@reporter, data['task_uuid'], data['nodes'])
end
it "ignore problem with parsing provision log" do
Astute::LogParser::ParseProvisionLogs.any_instance do
stubs(:prepare).with(data['nodes']).raises
end
@provisioner.stubs(:report_about_progress).returns()
@provisioner.stubs(:node_type).returns([{'uid' => '1', 'node_type' => 'target' }])
@provisioner.watch_provision_progress(@reporter, data['task_uuid'], data['nodes'])
end
it 'provision nodes using mclient' do
@provisioner.stubs(:report_about_progress).returns()
@provisioner.expects(:node_type).returns([{'uid' => '1', 'node_type' => 'target' }])
@provisioner.watch_provision_progress(@reporter, data['task_uuid'], data['nodes'])
end
it "fail if timeout of provisioning is exceeded" do
Astute::LogParser::ParseProvisionLogs.any_instance do
stubs(:prepare).returns()
end
Timeout.stubs(:timeout).raises(Timeout::Error)
msg = 'Timeout of provisioning is exceeded'
error_msg = {
'status' => 'error',
'error' => msg,
'progress' => 100,
'nodes' => [{
'uid' => '1',
'status' => 'error',
'error_msg' => msg,
'progress' => 100,
'error_type' => 'provision'}]}
@reporter.expects(:report).with(error_msg).once
@provisioner.watch_provision_progress(@reporter, data['task_uuid'], data['nodes'])
end
it 'success report if all nodes were provisioned' do
@provisioner.stubs(:report_about_progress).returns()
@provisioner.expects(:node_type).returns([{'uid' => '1', 'node_type' => 'target' }])
@provisioner.stubs(:analize_node_types).returns([['1'], []])
success_msg = {
'status' => 'ready',
'progress' => 100,
'nodes' => [{
'uid' => '1',
'status' => 'provisioned',
'progress' => 100}]}
@reporter.expects(:report).with(success_msg).once
@provisioner.watch_provision_progress(@reporter, data['task_uuid'], data['nodes'])
end
it 'success report if all nodes report about success at least once' do
nodes = [
{ 'uid' => '1'},
{ 'uid' => '2'}
]
@provisioner.stubs(:report_about_progress).returns()
@provisioner.stubs(:node_type)
.returns([{'uid' => '1', 'node_type' => 'target' }])
.then.returns([{'uid' => '2', 'node_type' => 'target' }])
success_msg = {
'status' => 'ready',
'progress' => 100,
'nodes' => [{
'uid' => '1',
'status' => 'provisioned',
'progress' => 100},
{
'uid' => '2',
'status' => 'provisioned',
'progress' => 100}
]}
@reporter.expects(:report).with(success_msg).once
@provisioner.watch_provision_progress(@reporter, data['task_uuid'], nodes)
end
it "unexpecting bootstrap nodes should be ereased and rebooted" do
nodes = [
{ 'uid' => '1'},
{ 'uid' => '2'}
]
@provisioner.stubs(:report_about_progress).returns()
@provisioner.stubs(:node_type)
.returns([{'uid' => '1', 'node_type' => 'target' }])
.then.returns([{'uid' => '2', 'node_type' => 'bootstrap' }])
.then.returns([{'uid' => '2', 'node_type' => 'bootstrap' }])
.then.returns([{'uid' => '2', 'node_type' => 'target' }])
Astute::NodesRemover.any_instance.expects(:remove)
.twice.returns({"nodes"=>[{"uid"=>"2", }]})
success_msg = {
'status' => 'ready',
'progress' => 100,
'nodes' => [{
'uid' => '1',
'status' => 'provisioned',
'progress' => 100},
{
'uid' => '2',
'status' => 'provisioned',
'progress' => 100}
]}
@reporter.expects(:report).with(success_msg).once
@provisioner.watch_provision_progress(@reporter, data['task_uuid'], nodes)
end
end
describe '#stop_provision' do
around(:each) do |example|
old_ssh_retries = Astute.config.ssh_retries
old_mc_retries = Astute.config.mc_retries
old_nodes_rm_interal = Astute.config.nodes_remove_interval
example.run
Astute.config.ssh_retries = old_ssh_retries
Astute.config.mc_retries = old_mc_retries
Astute.config.nodes_remove_interval = old_nodes_rm_interal
end
before(:each) do
Astute.config.ssh_retries = 1
Astute.config.mc_retries = 1
Astute.config.nodes_remove_interval = 0
end
it 'erase nodes using ssh' do
Astute::CobblerManager.any_instance.stubs(:remove_nodes).returns([])
@provisioner.stubs(:stop_provision_via_mcollective).returns([[], {}])
Astute::Ssh.stubs(:execute).returns({'inaccessible_nodes' => [{'uid' => 1}]}).once
Astute::Ssh.expects(:execute).with(instance_of(Astute::Context),
data['nodes'],
Astute::SshEraseNodes.command)
.returns({'nodes' => [{'uid' => 1}]})
expect(@provisioner.stop_provision(@reporter,
data['task_uuid'],
data['engine'],
data['nodes']))
.to eql({
"error_nodes" => [],
"inaccessible_nodes" => [],
"nodes" => [{"uid"=>1}]
})
end
it 'always remove nodes from Cobbler' do
Astute::Ssh.stubs(:execute).twice.returns({'inaccessible_nodes' => [{'uid' => 1}]})
@provisioner.stubs(:stop_provision_via_mcollective).returns([[], {}])
Astute::CobblerManager.any_instance.expects(:remove_nodes)
.with(data['nodes'])
.returns([])
@provisioner.stop_provision(@reporter,
data['task_uuid'],
data['engine'],
data['nodes'])
end
it 'reboot nodes using using ssh' do
Astute::CobblerManager.any_instance.stubs(:remove_nodes).returns([])
@provisioner.stubs(:stop_provision_via_mcollective).returns([[], {}])
Astute::Ssh.stubs(:execute).returns({'nodes' => [{'uid' => 1}]}).once
Astute::Ssh.expects(:execute).with(instance_of(Astute::Context),
data['nodes'],
Astute::SshHardReboot.command,
timeout=5,
retries=1)
.returns({'inaccessible_nodes' => [{'uid' => 1}]})
expect(@provisioner.stop_provision(@reporter,
data['task_uuid'],
data['engine'],
data['nodes']))
.to eql({
"error_nodes" => [],
"inaccessible_nodes" => [],
"nodes" => [{"uid"=>1}]
})
end
it 'stop provision if provision operation stop immediately' do
@provisioner.stubs(:stop_provision_via_ssh)
.returns({'inaccessible_nodes' => [{'uid' => '1'}]})
@provisioner.stubs(:node_type).returns([{'uid' => '1', 'node_type' => 'bootstrap'}])
Astute::NodesRemover.any_instance.expects(:remove)
.once.returns({"nodes"=>[{"uid"=>"1", }]})
expect(@provisioner.stop_provision(@reporter,
data['task_uuid'],
data['engine'],
data['nodes']))
.to eql({
"error_nodes" => [],
"inaccessible_nodes" => [],
"nodes" => [{"uid"=>"1"}]
})
end
it 'stop provision if provision operation stop in the end' do
@provisioner.stubs(:stop_provision_via_ssh)
.returns({'nodes' => [{'uid' => "1"}]})
@provisioner.stubs(:node_type).returns([{'uid' => "1", 'node_type' => 'target'}])
Astute::NodesRemover.any_instance.expects(:remove)
.once.returns({"nodes"=>[{"uid"=>"1", }]})
expect(@provisioner.stop_provision(@reporter,
data['task_uuid'],
data['engine'],
data['nodes']))
.to eql({
"error_nodes" => [],
"inaccessible_nodes" => [],
"nodes" => [{"uid"=>"1"}]
})
end
it 'inform about inaccessible nodes' do
Astute::Ssh.stubs(:execute).returns({'inaccessible_nodes' => [{'uid' => 1}]}).twice
Astute::CobblerManager.any_instance.stubs(:remove_nodes).returns([])
@provisioner.stubs(:node_type).returns([])
Astute::NodesRemover.any_instance.expects(:remove).never
expect(@provisioner.stop_provision(@reporter,
data['task_uuid'],
data['engine'],
data['nodes']))
.to eql({
"error_nodes" => [],
"inaccessible_nodes" => [{"uid"=>1}],
"nodes" => []
})
end
it 'sleep between attempts to find and erase nodes using mcollective' do
@provisioner.stubs(:stop_provision_via_ssh)
.returns({'inaccessible_nodes' => [{'uid' => '1'}]})
@provisioner.stubs(:node_type).returns([{'uid' => '1', 'node_type' => 'bootstrap'}])
Astute::NodesRemover.any_instance.stubs(:remove)
.once.returns({"nodes"=>[{"uid"=>"1", }]})
@provisioner.expects(:sleep).with(Astute.config.nodes_remove_interval)
@provisioner.stop_provision(@reporter,
data['task_uuid'],
data['engine'],
data['nodes'])
end
it 'perform several attempts to find and erase nodes using mcollective' do
Astute.config.mc_retries = 2
Astute.config.nodes_remove_interval = 0
@provisioner.stubs(:stop_provision_via_ssh)
.returns({'nodes' => [{'uid' => "1"}],
'inaccessible_nodes' => [{'uid' => '2'}]})
@provisioner.stubs(:node_type).twice
.returns([{'uid' => '1', 'node_type' => 'bootstrap'}])
.then.returns([{'uid' => '2', 'node_type' => 'target'}])
Astute::NodesRemover.any_instance.stubs(:remove).twice
.returns({"nodes"=>[{"uid"=>"1"}]}).then
.returns({"error_nodes"=>[{"uid"=>"2"}]})
data['nodes'] << {
"uid" => '2',
"slave_name"=>"controller-2",
"admin_ip" =>'1.2.3.6'
}
expect(@provisioner.stop_provision(@reporter,
data['task_uuid'],
data['engine'],
data['nodes']))
.to eql({
"error_nodes" => [{"uid"=>'2'}],
"inaccessible_nodes" => [],
"nodes" => [{"uid"=>"1"}],
"status" => "error"
})
end
end # stop_provision
end