Strong puppet hung check
* add second round for puppetd_runonce check; * change hang message to warn status; * mark hung nodes as error (before was running); * remove unused 'change_node_status' param; * refactoring; * add tests. Change-Id: Iba7ac5d30a1e57842a44c95c3c68f19dd71bccff Closes-Bug: #1256244
This commit is contained in:
parent
d14e9d2475
commit
1b8d6e18a3
|
@ -19,4 +19,4 @@ PUPPET_FADE_TIMEOUT: 60
|
|||
# PUPPET_FADE_INTERVAL is used in puppetd.rb file.
|
||||
# Retry every PUPPET_FADE_INTERVAL seconds to check puppet state if it was
|
||||
# in 'running' state.
|
||||
PUPPET_FADE_INTERVAL: 1
|
||||
PUPPET_FADE_INTERVAL: 5
|
||||
|
|
|
@ -58,7 +58,7 @@ module Astute
|
|||
conf[:PUPPET_FADE_TIMEOUT] = 60 # how long it can take for puppet to exit after dumping to last_run_summary
|
||||
conf[:MC_RETRIES] = 5 # MClient tries to call mcagent before failure
|
||||
conf[:MC_RETRY_INTERVAL] = 1 # MClient sleeps for ## sec between retries
|
||||
conf[:PUPPET_FADE_INTERVAL] = 1 # retry every ## seconds to check puppet state if it was running
|
||||
conf[:PUPPET_FADE_INTERVAL] = 5 # retry every ## seconds to check puppet state if it was running
|
||||
conf[:PROVISIONING_TIMEOUT] = 90 * 60 # timeout for booting target OS in provision
|
||||
conf[:REBOOT_TIMEOUT] = 120 # how long it can take for node to reboot
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ require 'yaml'
|
|||
|
||||
class Astute::DeploymentEngine::NailyFact < Astute::DeploymentEngine
|
||||
|
||||
def deploy_piece(nodes, retries=2, change_node_status=true)
|
||||
def deploy_piece(nodes, retries=2)
|
||||
return false unless validate_nodes(nodes)
|
||||
|
||||
@ctx.reporter.report(nodes_status(nodes, 'deploying', {'progress' => 0}))
|
||||
|
@ -30,7 +30,7 @@ class Astute::DeploymentEngine::NailyFact < Astute::DeploymentEngine
|
|||
nodes.each { |node| upload_facts(node) }
|
||||
Astute.logger.info "#{@ctx.task_id}: Required attrs/metadata passed via facts extension. Starting deployment."
|
||||
|
||||
Astute::PuppetdDeployer.deploy(@ctx, nodes, retries, change_node_status)
|
||||
Astute::PuppetdDeployer.deploy(@ctx, nodes, retries)
|
||||
nodes_roles = nodes.map { |n| {n['uid'] => n['role']} }
|
||||
Astute.logger.info "#{@ctx.task_id}: Finished deployment of nodes => roles: #{nodes_roles.inspect}"
|
||||
end
|
||||
|
|
|
@ -18,6 +18,147 @@ require 'timeout'
|
|||
|
||||
module Astute
|
||||
module PuppetdDeployer
|
||||
|
||||
def self.deploy(ctx, nodes, retries=2)
|
||||
@ctx = ctx
|
||||
@nodes_roles = nodes.inject({}) { |h, n| h.merge({n['uid'] => n['role']}) }
|
||||
@node_retries = nodes.inject({}) { |h, n| h.merge({n['uid'] => retries}) }
|
||||
@nodes = nodes
|
||||
|
||||
Astute.logger.debug "Waiting for puppet to finish deployment on all
|
||||
nodes (timeout = #{Astute.config.PUPPET_TIMEOUT} sec)..."
|
||||
time_before = Time.now
|
||||
|
||||
deploy_nodes(@nodes.map { |n| n['uid'] })
|
||||
|
||||
time_spent = Time.now - time_before
|
||||
Astute.logger.info "#{@ctx.task_id}: Spent #{time_spent} seconds on puppet run "\
|
||||
"for following nodes(uids): #{@nodes.map {|n| n['uid']}.join(',')}"
|
||||
end
|
||||
|
||||
private
|
||||
# Runs puppetd.runonce only if puppet is stopped on the host at the time
|
||||
# If it isn't stopped, we wait a bit and try again.
|
||||
# Returns list of nodes uids which appear to be with hung puppet.
|
||||
def self.puppetd_runonce(uids)
|
||||
started = Time.now.to_i
|
||||
while Time.now.to_i - started < Astute.config.PUPPET_FADE_TIMEOUT
|
||||
running_uids = puppetd(uids).last_run_summary.select { |x|
|
||||
['running', 'idling'].include?(x.results[:data][:status])
|
||||
}.map { |n| n.results[:sender] }
|
||||
stopped_uids = uids - running_uids
|
||||
|
||||
@nodes.select { |n| stopped_uids.include? n['uid'] }
|
||||
.group_by { |n| n['debug'] }
|
||||
.each do |debug, stop_nodes|
|
||||
puppetd(stop_nodes.map { |n| n['uid'] }).runonce(:puppet_debug => debug)
|
||||
end
|
||||
break if running_uids.empty?
|
||||
|
||||
uids = running_uids
|
||||
sleep Astute.config.PUPPET_FADE_INTERVAL
|
||||
end
|
||||
Astute.logger.debug "puppetd_runonce completed within #{Time.now.to_i - started} seconds."
|
||||
Astute.logger.warn "Following nodes have puppet hung: '#{running_uids.join(',')}'" if running_uids.present?
|
||||
running_uids
|
||||
end
|
||||
|
||||
def self.calc_nodes_status(last_run, prev_run, hung_nodes=[])
|
||||
# Finished are those which are not in running state,
|
||||
# and changed their last_run time, which is changed after application of catalog,
|
||||
# at the time of updating last_run_summary file. At that particular time puppet is
|
||||
# still running, and will finish in a couple of seconds.
|
||||
# If Puppet had crashed before it got a catalog (e.g. certificate problems),
|
||||
# it didn't update last_run_summary file and switched to 'stopped' state.
|
||||
|
||||
stopped = last_run.select { |x| ['stopped', 'disabled'].include? x.results[:data][:status] }
|
||||
|
||||
# Select all finished nodes which not failed and changed last_run time.
|
||||
succeed_nodes = stopped.select { |n|
|
||||
prev_n = prev_run.find{|ps| ps.results[:sender] == n.results[:sender] }
|
||||
|
||||
n.results[:data][:status] == 'stopped' &&
|
||||
n.results[:data][:resources]['failed'].to_i == 0 &&
|
||||
n.results[:data][:resources]['failed_to_restart'].to_i == 0 &&
|
||||
n.results[:data][:time]['last_run'] != (prev_n && prev_n.results[:data][:time]['last_run'])
|
||||
}.map{|x| x.results[:sender] }
|
||||
|
||||
stopped_nodes = stopped.map { |x| x.results[:sender] }
|
||||
error_nodes = stopped_nodes - succeed_nodes
|
||||
running_nodes = last_run.map {|n| n.results[:sender]} - stopped_nodes
|
||||
|
||||
# Hunged nodes can change state at this moment(success, error or still run),
|
||||
# but we should to turn it on only in error_nodes
|
||||
succeed_nodes -= hung_nodes
|
||||
error_nodes = (error_nodes + hung_nodes).uniq
|
||||
running_nodes = last_run.map {|n| n.results[:sender]} - stopped_nodes - hung_nodes
|
||||
|
||||
|
||||
nodes_to_check = running_nodes + succeed_nodes + error_nodes
|
||||
unless nodes_to_check.size == last_run.size
|
||||
raise "Should never happen. Internal error in nodes statuses calculation. Statuses calculated for: #{nodes_to_check.inspect},"
|
||||
"nodes passed to check statuses of: #{last_run.map {|n| n.results[:sender]}}"
|
||||
end
|
||||
{'succeed' => succeed_nodes, 'error' => error_nodes, 'running' => running_nodes}
|
||||
end
|
||||
|
||||
|
||||
def self.puppetd(uids)
|
||||
puppetd = MClient.new(@ctx, "puppetd", Array(uids))
|
||||
puppetd.on_respond_timeout do |uids|
|
||||
nodes = uids.map do |uid|
|
||||
{ 'uid' => uid, 'status' => 'error', 'error_type' => 'deploy', 'role' => @nodes_roles[uid] }
|
||||
end
|
||||
@ctx.report_and_update_status('nodes' => nodes)
|
||||
end
|
||||
puppetd
|
||||
end
|
||||
|
||||
def self.processing_error_nodes(error_nodes)
|
||||
nodes_to_report = []
|
||||
nodes_to_retry = []
|
||||
|
||||
error_nodes.each do |uid|
|
||||
if @node_retries[uid] > 0
|
||||
@node_retries[uid] -= 1
|
||||
Astute.logger.debug "Puppet on node #{uid.inspect} will be restarted. "\
|
||||
"#{@node_retries[uid]} retries remained."
|
||||
nodes_to_retry << uid
|
||||
else
|
||||
Astute.logger.debug "Node #{uid.inspect} has failed to deploy. There is no more retries for puppet run."
|
||||
nodes_to_report << {'uid' => uid, 'status' => 'error', 'error_type' => 'deploy', 'role' => @nodes_roles[uid] }
|
||||
end
|
||||
end
|
||||
|
||||
return nodes_to_report, nodes_to_retry
|
||||
end
|
||||
|
||||
def self.processing_running_nodes(running_nodes)
|
||||
nodes_to_report = []
|
||||
if running_nodes.present?
|
||||
begin
|
||||
# Pass nodes because logs calculation needs IP address of node, not just uid
|
||||
nodes_progress = @ctx.deploy_log_parser.progress_calculate(running_nodes, @nodes)
|
||||
if nodes_progress.present?
|
||||
Astute.logger.debug "Got progress for nodes: #{nodes_progress.inspect}"
|
||||
# Nodes with progress are running, so they are not included in nodes_to_report yet
|
||||
nodes_progress.map! { |x| x.merge!('status' => 'deploying', 'role' => @nodes_roles[x['uid']]) }
|
||||
nodes_to_report = nodes_progress
|
||||
end
|
||||
rescue => e
|
||||
Astute.logger.warn "Some error occurred when parse logs for nodes progress: #{e.message}, "\
|
||||
"trace: #{e.format_backtrace}"
|
||||
end
|
||||
end
|
||||
nodes_to_report
|
||||
end
|
||||
|
||||
def self.processing_succeed_nodes(succeed_nodes)
|
||||
succeed_nodes.map do |uid|
|
||||
{ 'uid' => uid, 'status' => 'ready', 'role' => @nodes_roles[uid] }
|
||||
end
|
||||
end
|
||||
|
||||
# As I (Andrey Danin) understand, Puppet agent goes through these steps:
|
||||
# * Puppetd has 'stopped' state.
|
||||
# * We run it as a run_once, and puppetd goes to 'idling' state - it trying to
|
||||
|
@ -31,153 +172,39 @@ module Astute
|
|||
# * After puppetd finished all internal jobs connected with finished catalog,
|
||||
# it goes to 'idling' state.
|
||||
# * After a short time it goes to 'stopped' state because we ran it as a run_once.
|
||||
|
||||
private
|
||||
# Runs puppetd.runonce only if puppet is stopped on the host at the time
|
||||
# If it isn't stopped, we wait a bit and try again.
|
||||
# Returns list of nodes uids which appear to be with hung puppet.
|
||||
def self.puppetd_runonce(puppetd, uids, nodes)
|
||||
debug_mode_dict = nodes.inject({}) {|dict, node| dict[node['uid']] = node['debug']; dict}
|
||||
started = Time.now.to_i
|
||||
while Time.now.to_i - started < Astute.config.PUPPET_FADE_TIMEOUT
|
||||
puppetd.discover(:nodes => uids)
|
||||
last_run = puppetd.last_run_summary
|
||||
running_uids = last_run.select {|x| x.results[:data][:status] != 'stopped'}.map {|n| n.results[:sender]}
|
||||
stopped_uids = uids - running_uids
|
||||
# If stopped_uids is empty this cycle will not be called.
|
||||
stopped_uids.each do |uid|
|
||||
puppetd.discover(:nodes => [uid])
|
||||
puppetd.runonce(:puppet_debug => debug_mode_dict[uid])
|
||||
end
|
||||
uids = running_uids
|
||||
break if uids.empty?
|
||||
sleep Astute.config.PUPPET_FADE_INTERVAL
|
||||
end
|
||||
Astute.logger.debug "puppetd_runonce completed within #{Time.now.to_i - started} seconds."
|
||||
Astute.logger.debug "Following nodes have puppet hung: '#{running_uids.join(',')}'" if running_uids.any?
|
||||
running_uids
|
||||
end
|
||||
|
||||
def self.calc_nodes_status(last_run, prev_run)
|
||||
# Finished are those which are not in running state,
|
||||
# and changed their last_run time, which is changed after application of catalog,
|
||||
# at the time of updating last_run_summary file. At that particular time puppet is
|
||||
# still running, and will finish in a couple of seconds.
|
||||
# If Puppet had crashed before it got a catalog (e.g. certificate problems),
|
||||
# it didn't update last_run_summary file and switched to 'stopped' state.
|
||||
|
||||
stopped = last_run.select {|x| x.results[:data][:status] == 'stopped'}
|
||||
|
||||
# Select all finished nodes which not failed and changed last_run time.
|
||||
succeed_nodes = stopped.select { |n|
|
||||
prev_n = prev_run.find{|ps| ps.results[:sender] == n.results[:sender] }
|
||||
n.results[:data][:resources]['failed'].to_i == 0 &&
|
||||
n.results[:data][:resources]['failed_to_restart'].to_i == 0 &&
|
||||
n.results[:data][:time]['last_run'] != (prev_n && prev_n.results[:data][:time]['last_run'])
|
||||
}.map{|x| x.results[:sender] }
|
||||
|
||||
stopped_nodes = stopped.map {|x| x.results[:sender]}
|
||||
error_nodes = stopped_nodes - succeed_nodes
|
||||
|
||||
# Running are all which didn't appear in finished
|
||||
running_nodes = last_run.map {|n| n.results[:sender]} - stopped_nodes
|
||||
|
||||
nodes_to_check = running_nodes + succeed_nodes + error_nodes
|
||||
unless nodes_to_check.size == last_run.size
|
||||
raise "Shoud never happen. Internal error in nodes statuses calculation. Statuses calculated for: #{nodes_to_check.inspect},"
|
||||
"nodes passed to check statuses of: #{last_run.map {|n| n.results[:sender]}}"
|
||||
end
|
||||
{'succeed' => succeed_nodes, 'error' => error_nodes, 'running' => running_nodes}
|
||||
end
|
||||
|
||||
public
|
||||
def self.deploy(ctx, nodes, retries=2, change_node_status=true)
|
||||
# TODO: can we hide retries, ignore_failure into @ctx ?
|
||||
uids = nodes.map { |n| n['uid'] }
|
||||
nodes_roles = {}
|
||||
nodes.each { |n| nodes_roles[n['uid']] = n['role'] }
|
||||
|
||||
# Keep info about retries for each node
|
||||
node_retries = {}
|
||||
uids.each {|x| node_retries.merge!({x => retries}) }
|
||||
Astute.logger.debug "Waiting for puppet to finish deployment on all nodes (timeout = #{Astute.config.PUPPET_TIMEOUT} sec)..."
|
||||
time_before = Time.now
|
||||
def self.deploy_nodes(nodes_to_check)
|
||||
Timeout::timeout(Astute.config.PUPPET_TIMEOUT) do
|
||||
puppetd = MClient.new(ctx, "puppetd", uids)
|
||||
puppetd.on_respond_timeout do |uids|
|
||||
nodes = uids.map do |uid|
|
||||
{ 'uid' => uid, 'status' => 'error', 'error_type' => 'deploy', 'role' => nodes_roles[uid] }
|
||||
end
|
||||
ctx.report_and_update_status('nodes' => nodes)
|
||||
end if change_node_status
|
||||
prev_summary = puppetd.last_run_summary
|
||||
puppetd_runonce(puppetd, uids, nodes)
|
||||
nodes_to_check = uids
|
||||
last_run = puppetd.last_run_summary
|
||||
while nodes_to_check.any?
|
||||
calc_nodes = calc_nodes_status(last_run, prev_summary)
|
||||
prev_summary = puppetd(nodes_to_check).last_run_summary
|
||||
hung_nodes = puppetd_runonce(nodes_to_check)
|
||||
|
||||
while nodes_to_check.present?
|
||||
last_run = puppetd(nodes_to_check).last_run_summary
|
||||
calc_nodes = calc_nodes_status(last_run, prev_summary, hung_nodes)
|
||||
Astute.logger.debug "Nodes statuses: #{calc_nodes.inspect}"
|
||||
|
||||
# At least we will report about successfully deployed nodes
|
||||
nodes_to_report = []
|
||||
if change_node_status
|
||||
nodes_to_report.concat(calc_nodes['succeed'].map do |uid|
|
||||
{ 'uid' => uid, 'status' => 'ready', 'role' => nodes_roles[uid] }
|
||||
end)
|
||||
end
|
||||
report_succeed = processing_succeed_nodes calc_nodes['succeed']
|
||||
report_error, nodes_to_retry = processing_error_nodes(calc_nodes['error'])
|
||||
report_running = processing_running_nodes(calc_nodes['running'])
|
||||
|
||||
# Process retries
|
||||
nodes_to_retry = []
|
||||
calc_nodes['error'].each do |uid|
|
||||
if node_retries[uid] > 0
|
||||
node_retries[uid] -= 1
|
||||
Astute.logger.debug "Puppet on node #{uid.inspect} will be restarted. "\
|
||||
"#{node_retries[uid]} retries remained."
|
||||
nodes_to_retry << uid
|
||||
else
|
||||
Astute.logger.debug "Node #{uid.inspect} has failed to deploy. There is no more retries for puppet run."
|
||||
nodes_to_report << {'uid' => uid, 'status' => 'error', 'error_type' => 'deploy', 'role' => nodes_roles[uid] } if change_node_status
|
||||
end
|
||||
end
|
||||
if nodes_to_retry.any?
|
||||
nodes_to_report = report_succeed + report_error + report_running
|
||||
@ctx.report_and_update_status('nodes' => nodes_to_report) if nodes_to_report.present?
|
||||
|
||||
if nodes_to_retry.present?
|
||||
Astute.logger.info "Retrying to run puppet for following error nodes: #{nodes_to_retry.join(',')}"
|
||||
puppetd_runonce(puppetd, nodes_to_retry, nodes)
|
||||
hung_nodes = puppetd_runonce(nodes_to_retry)
|
||||
# We need this magic with prev_summary to reflect new puppetd run statuses..
|
||||
prev_summary.delete_if { |x| nodes_to_retry.include?(x.results[:sender]) }
|
||||
prev_summary += last_run.select { |x| nodes_to_retry.include?(x.results[:sender]) }
|
||||
end
|
||||
# /end of processing retries
|
||||
|
||||
if calc_nodes['running'].any?
|
||||
begin
|
||||
# Pass nodes because logs calculation needs IP address of node, not just uid
|
||||
nodes_progress = ctx.deploy_log_parser.progress_calculate(calc_nodes['running'], nodes)
|
||||
if nodes_progress.any?
|
||||
Astute.logger.debug "Got progress for nodes: #{nodes_progress.inspect}"
|
||||
# Nodes with progress are running, so they are not included in nodes_to_report yet
|
||||
nodes_progress.map! { |x| x.merge!('status' => 'deploying', 'role' => nodes_roles[x['uid']]) }
|
||||
nodes_to_report += nodes_progress
|
||||
end
|
||||
rescue => e
|
||||
Astute.logger.warn "Some error occurred when parse logs for nodes progress: #{e.message}, "\
|
||||
"trace: #{e.format_backtrace}"
|
||||
end
|
||||
end
|
||||
|
||||
ctx.report_and_update_status('nodes' => nodes_to_report) if nodes_to_report.any?
|
||||
|
||||
# we will iterate only over running nodes and those that we restart deployment for
|
||||
nodes_to_check = calc_nodes['running'] + nodes_to_retry
|
||||
break if nodes_to_check.empty?
|
||||
|
||||
break if nodes_to_check.empty?
|
||||
sleep Astute.config.PUPPET_DEPLOY_INTERVAL
|
||||
puppetd.discover(:nodes => nodes_to_check)
|
||||
last_run = puppetd.last_run_summary
|
||||
end
|
||||
end
|
||||
time_spent = Time.now - time_before
|
||||
Astute.logger.info "#{ctx.task_id}: Spent #{time_spent} seconds on puppet run "\
|
||||
"for following nodes(uids): #{nodes.map {|n| n['uid']}.join(',')}"
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
|
|
|
@ -33,6 +33,7 @@ Dir[File.join(File.dirname(__FILE__), 'unit/fixtures/*.rb')].each { |file| requi
|
|||
# resetting time to sleep significantly increases tests speed
|
||||
Astute.config.PUPPET_DEPLOY_INTERVAL = 0
|
||||
Astute.config.PUPPET_FADE_INTERVAL = 0
|
||||
Astute.config.PUPPET_FADE_TIMEOUT = 1
|
||||
Astute.config.MC_RETRY_INTERVAL = 0
|
||||
Astute.config.PROVISIONING_TIMEOUT = 0
|
||||
Astute.config.REBOOT_TIMEOUT = 0
|
||||
|
|
|
@ -60,7 +60,7 @@ describe "NailyFact DeploymentEngine" do
|
|||
|
||||
it "it should not raise an exception if deployment mode is unknown" do
|
||||
deploy_engine.expects(:upload_facts).times(deploy_data.size)
|
||||
Astute::PuppetdDeployer.stubs(:deploy).with(ctx, deploy_data, instance_of(Fixnum), true).once
|
||||
Astute::PuppetdDeployer.stubs(:deploy).with(ctx, deploy_data, instance_of(Fixnum)).once
|
||||
expect {deploy_engine.deploy(deploy_data)}.to_not raise_exception
|
||||
end
|
||||
end
|
||||
|
@ -74,8 +74,8 @@ describe "NailyFact DeploymentEngine" do
|
|||
deploy_engine.expects(:upload_facts).times(deploy_data.size)
|
||||
|
||||
# we got two calls, one for controller (high priority), and another for all computes (same low priority)
|
||||
Astute::PuppetdDeployer.expects(:deploy).with(ctx, controller_nodes, instance_of(Fixnum), true).once
|
||||
Astute::PuppetdDeployer.expects(:deploy).with(ctx, compute_nodes, instance_of(Fixnum), true).once
|
||||
Astute::PuppetdDeployer.expects(:deploy).with(ctx, controller_nodes, instance_of(Fixnum)).once
|
||||
Astute::PuppetdDeployer.expects(:deploy).with(ctx, compute_nodes, instance_of(Fixnum)).once
|
||||
|
||||
expect {deploy_engine.deploy(deploy_data)}.to_not raise_exception
|
||||
end
|
||||
|
@ -122,12 +122,12 @@ describe "NailyFact DeploymentEngine" do
|
|||
deploy_engine.expects(:upload_facts).at_least_once
|
||||
|
||||
primary_controller = deploy_data.find { |n| n['role'] == 'primary-controller' }
|
||||
Astute::PuppetdDeployer.expects(:deploy).with(ctx, [primary_controller], 2, true).once
|
||||
Astute::PuppetdDeployer.expects(:deploy).with(ctx, [primary_controller], 2).once
|
||||
|
||||
controller_nodes.each do |n|
|
||||
Astute::PuppetdDeployer.expects(:deploy).with(ctx, [n], 2, true).once
|
||||
Astute::PuppetdDeployer.expects(:deploy).with(ctx, [n], 2).once
|
||||
end
|
||||
Astute::PuppetdDeployer.expects(:deploy).with(ctx, compute_nodes, instance_of(Fixnum), true).once
|
||||
Astute::PuppetdDeployer.expects(:deploy).with(ctx, compute_nodes, instance_of(Fixnum)).once
|
||||
|
||||
deploy_engine.deploy(deploy_data)
|
||||
end
|
||||
|
|
|
@ -22,15 +22,15 @@ describe "Puppetd" do
|
|||
|
||||
context "PuppetdDeployer" do
|
||||
let(:reporter) { mock('reporter') }
|
||||
|
||||
|
||||
let(:ctx) do
|
||||
Context.new("task id", ProxyReporter::DeploymentProxyReporter.new(reporter), Astute::LogParser::NoParsing.new)
|
||||
end
|
||||
|
||||
|
||||
let(:nodes) { [{'uid' => '1', 'role' => 'compute'}] }
|
||||
|
||||
|
||||
let(:rpcclient) { mock_rpcclient(nodes) }
|
||||
|
||||
|
||||
let(:last_run_result) do
|
||||
{
|
||||
:statuscode =>0,
|
||||
|
@ -48,113 +48,94 @@ describe "Puppetd" do
|
|||
:sender=>"1"
|
||||
}
|
||||
end
|
||||
|
||||
|
||||
let(:last_run_result_running) do
|
||||
res = deep_copy(last_run_result)
|
||||
res[:data].merge!(:status => 'running', :running => 1, :stopped => 0)
|
||||
res
|
||||
end
|
||||
|
||||
|
||||
let(:last_run_result_fail) do
|
||||
res = deep_copy(last_run_result_running)
|
||||
res[:data].merge!(:runtime => 1358426000,
|
||||
res[:data].merge!(:runtime => 1358426000,
|
||||
:time => {"last_run" => 1358426000},
|
||||
:resources => {"failed" => 1}
|
||||
)
|
||||
res
|
||||
end
|
||||
|
||||
|
||||
let(:last_run_failed) do
|
||||
res = deep_copy(last_run_result_fail)
|
||||
res[:data].merge!(:status => 'stopped', :stopped => 1, :running => 0)
|
||||
res
|
||||
end
|
||||
|
||||
|
||||
let(:last_run_result_finished) do
|
||||
res = deep_copy last_run_result
|
||||
res[:data][:time]['last_run'] = 1358428000
|
||||
res[:data][:status] = 'stopped'
|
||||
res
|
||||
end
|
||||
|
||||
|
||||
context 'reportet behavior' do
|
||||
let(:last_run_result) do
|
||||
{
|
||||
:data=> {
|
||||
:time=>{"last_run"=>1358425701},
|
||||
:status => "running",
|
||||
:resources => {'failed' => 0},
|
||||
:running => 1,
|
||||
:idling => 0
|
||||
},
|
||||
:sender=>"1"
|
||||
}
|
||||
end
|
||||
|
||||
|
||||
let(:prepare_mcollective_env) do
|
||||
last_run_result_new = deep_copy last_run_result
|
||||
last_run_result_new[:data][:time]['last_run'] = 1358426000
|
||||
|
||||
|
||||
rpcclient_new_res = mock_mc_result(last_run_result_new)
|
||||
rpcclient_finished_res = mock_mc_result(last_run_result_finished)
|
||||
rpcclient_valid_result = mock_mc_result(last_run_result)
|
||||
|
||||
rpcclient.stubs(:last_run_summary).returns([rpcclient_valid_result]).then.
|
||||
returns([rpcclient_valid_result]).then.
|
||||
returns([ mock_mc_result(last_run_result_running) ]).then.
|
||||
returns([ mock_mc_result(last_run_result_running) ]).then.
|
||||
returns([rpcclient_new_res]).then.
|
||||
returns([rpcclient_finished_res])
|
||||
|
||||
|
||||
rpcclient
|
||||
end
|
||||
|
||||
it "reports ready status for node if puppet deploy finished successfully" do
|
||||
prepare_mcollective_env
|
||||
|
||||
|
||||
reporter.expects(:report).with('nodes' => [{'uid' => '1', 'status' => 'ready', 'progress' => 100, 'role' => 'compute'}])
|
||||
rpcclient.expects(:runonce).at_least_once.returns([mock_mc_result(last_run_result)])
|
||||
|
||||
|
||||
Astute::PuppetdDeployer.deploy(ctx, nodes, retries=0)
|
||||
end
|
||||
|
||||
it "doesn't report ready status for node if change_node_status disabled" do
|
||||
prepare_mcollective_env
|
||||
|
||||
reporter.expects(:report).never
|
||||
rpcclient.expects(:runonce).at_least_once.returns([mock_mc_result(last_run_result)])
|
||||
|
||||
Astute::PuppetdDeployer.deploy(ctx, nodes, retries=0, change_node_status=false)
|
||||
end
|
||||
|
||||
context 'multiroles behavior' do
|
||||
let(:nodes) { [{'uid' => '1', 'role' => 'compute'}] }
|
||||
let(:nodes_multiroles) { [{'uid' => '1', 'role' => 'controller'}] }
|
||||
before(:each) do
|
||||
@ctx = Context.new("task id",
|
||||
ProxyReporter::DeploymentProxyReporter.new(reporter, nodes + nodes_multiroles),
|
||||
@ctx = Context.new("task id",
|
||||
ProxyReporter::DeploymentProxyReporter.new(reporter, nodes + nodes_multiroles),
|
||||
Astute::LogParser::NoParsing.new
|
||||
)
|
||||
end
|
||||
|
||||
|
||||
it "it should not send final status before all roles of node will deploy" do
|
||||
prepare_mcollective_env
|
||||
|
||||
reporter.expects(:report).with('nodes' => [{'uid' => '1', 'status' => 'deploying', 'progress' => 50, 'role' => 'compute'}])
|
||||
|
||||
reporter.expects(:report).with('nodes' => [{'uid' => '1', 'status' => 'deploying', 'progress' => 50, 'role' => 'compute'}])
|
||||
rpcclient.expects(:runonce).at_least_once.returns([mock_mc_result(last_run_result)])
|
||||
|
||||
|
||||
Astute::PuppetdDeployer.deploy(@ctx, nodes, retries=0)
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
|
||||
context "puppet state transitions" do
|
||||
|
||||
|
||||
let(:last_run_result_idle_pre) do
|
||||
res = deep_copy(last_run_result)
|
||||
res[:data].merge!(:status => 'idling', :idling => 1, :stopped => 0)
|
||||
res
|
||||
end
|
||||
|
||||
|
||||
let(:last_run_result_idle_post) do
|
||||
res = deep_copy(last_run_result_fail)
|
||||
res[:data].merge!(:status => 'idling', :idling => 1, :running => 0)
|
||||
|
@ -172,11 +153,11 @@ describe "Puppetd" do
|
|||
returns([ mock_mc_result(last_run_result_fail) ]).then.
|
||||
returns([ mock_mc_result(last_run_result_fail) ]).then.
|
||||
returns([ mock_mc_result(last_run_failed) ])
|
||||
|
||||
|
||||
reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}])
|
||||
rpcclient.expects(:runonce).once.
|
||||
returns([ mock_mc_result(last_run_result) ])
|
||||
|
||||
|
||||
Astute::PuppetdDeployer.deploy(ctx, nodes, 0)
|
||||
end
|
||||
|
||||
|
@ -188,11 +169,11 @@ describe "Puppetd" do
|
|||
returns([ mock_mc_result(last_run_result_running) ]).then.
|
||||
returns([ mock_mc_result(last_run_result_fail) ]).then.
|
||||
returns([ mock_mc_result(last_run_failed) ])
|
||||
|
||||
|
||||
reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}])
|
||||
rpcclient.expects(:runonce).once.
|
||||
returns([ mock_mc_result(last_run_result) ])
|
||||
|
||||
|
||||
Astute::PuppetdDeployer.deploy(ctx, nodes, 0)
|
||||
end
|
||||
|
||||
|
@ -202,26 +183,26 @@ describe "Puppetd" do
|
|||
returns([ mock_mc_result(last_run_result) ]).then.
|
||||
returns([ mock_mc_result(last_run_result_running) ]).then.
|
||||
returns([ mock_mc_result(last_run_failed) ])
|
||||
|
||||
|
||||
reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}])
|
||||
rpcclient.expects(:runonce).once.
|
||||
returns([ mock_mc_result(last_run_result) ])
|
||||
|
||||
|
||||
Astute::PuppetdDeployer.deploy(ctx, nodes, 0)
|
||||
end
|
||||
|
||||
it "publishes error status for node if puppet failed (a cycle w/ one running state only)" do
|
||||
it "publishes error status for node if puppet failed (a cycle with one running state only)" do
|
||||
rpcclient.stubs(:last_run_summary).times(5).
|
||||
returns([ mock_mc_result(last_run_result) ]).then.
|
||||
returns([ mock_mc_result(last_run_result) ]).then.
|
||||
returns([ mock_mc_result(last_run_result_running) ]).then.
|
||||
returns([ mock_mc_result(last_run_result_fail) ]).then.
|
||||
returns([ mock_mc_result(last_run_failed) ])
|
||||
|
||||
|
||||
reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}])
|
||||
rpcclient.expects(:runonce).once.
|
||||
returns([ mock_mc_result(last_run_result) ])
|
||||
|
||||
|
||||
Astute::PuppetdDeployer.deploy(ctx, nodes, 0)
|
||||
end
|
||||
|
||||
|
@ -230,30 +211,55 @@ describe "Puppetd" do
|
|||
returns([ mock_mc_result(last_run_result) ]).then.
|
||||
returns([ mock_mc_result(last_run_result) ]).then.
|
||||
returns([ mock_mc_result(last_run_failed) ])
|
||||
|
||||
|
||||
reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}])
|
||||
rpcclient.expects(:runonce).once.
|
||||
returns([ mock_mc_result(last_run_result) ])
|
||||
|
||||
|
||||
Astute::PuppetdDeployer.deploy(ctx, nodes, 0)
|
||||
end
|
||||
end
|
||||
|
||||
it "doesn't publish error status for node if change_node_status disabled" do
|
||||
reporter.expects(:report).never
|
||||
context '' do
|
||||
around(:each) do |example|
|
||||
old_value = Astute.config.PUPPET_FADE_INTERVAL
|
||||
example.run
|
||||
Astute.config.PUPPET_FADE_INTERVAL = old_value
|
||||
end
|
||||
|
||||
rpcclient_valid_result = mock_mc_result(last_run_result)
|
||||
rpcclient_new_res = mock_mc_result(last_run_result_fail)
|
||||
rpcclient_finished_res = mock_mc_result(last_run_failed)
|
||||
before(:each) do
|
||||
Astute.config.PUPPET_FADE_INTERVAL = 1
|
||||
end
|
||||
|
||||
rpcclient.stubs(:last_run_summary).returns([rpcclient_valid_result]).then.
|
||||
returns([rpcclient_valid_result]).then.
|
||||
returns([rpcclient_new_res]).then.
|
||||
returns([rpcclient_finished_res])
|
||||
rpcclient.expects(:runonce).at_least_once.returns([rpcclient_valid_result])
|
||||
it "publishes error status for node if puppet running alien task (attempts been exhausted)" do
|
||||
rpcclient.stubs(:last_run_summary).at_least(3).
|
||||
returns([ mock_mc_result(last_run_result_running) ]).then.
|
||||
returns([ mock_mc_result(last_run_result_running) ]).then.
|
||||
returns([ mock_mc_result(last_run_result_running) ]).then.
|
||||
returns([ mock_mc_result(last_run_result_running) ])
|
||||
|
||||
reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}])
|
||||
rpcclient.expects(:runonce).never
|
||||
|
||||
Astute::PuppetdDeployer.deploy(ctx, nodes, 0)
|
||||
end
|
||||
|
||||
it "ignore exit code of puppet running of alien task (waited for alien task stop and launched own)" do
|
||||
rpcclient.stubs(:last_run_summary).at_least(3).
|
||||
returns([ mock_mc_result(last_run_result_running) ]).then.
|
||||
returns([ mock_mc_result(last_run_result_running) ]).then.
|
||||
returns([ mock_mc_result(last_run_failed) ]).then.
|
||||
returns([ mock_mc_result(last_run_failed) ]).then.
|
||||
returns([ mock_mc_result(last_run_result_running) ]).then.
|
||||
returns([ mock_mc_result(last_run_result_running) ]).then.
|
||||
returns([ mock_mc_result(last_run_result_finished) ])
|
||||
|
||||
rpcclient.expects(:runonce).at_least(1).returns([ mock_mc_result(last_run_result) ])
|
||||
reporter.expects(:report).with('nodes' => [{'uid' => '1', 'status' => 'ready', 'progress' => 100, 'role' => 'compute'}])
|
||||
|
||||
Astute::PuppetdDeployer.deploy(ctx, nodes, 1)
|
||||
end
|
||||
end
|
||||
|
||||
MClient.any_instance.stubs(:rpcclient).returns(rpcclient)
|
||||
Astute::PuppetdDeployer.deploy(ctx, nodes, retries=0, change_node_status=false)
|
||||
end
|
||||
|
||||
it "retries to run puppet if it fails" do
|
||||
|
@ -268,7 +274,7 @@ describe "Puppetd" do
|
|||
returns([rpcclient_failed]).then.
|
||||
returns([rpcclient_fail]).then.
|
||||
returns([rpcclient_succeed])
|
||||
|
||||
|
||||
reporter.expects(:report).with('nodes' => [{'uid' => '1', 'status' => 'ready', 'progress' => 100, 'role' => 'compute'}])
|
||||
rpcclient.expects(:runonce).at_least_once.returns([rpcclient_valid_result])
|
||||
|
||||
|
|
Loading…
Reference in New Issue