Merge "Strong puppet hung check"

This commit is contained in:
Jenkins 2013-12-13 19:07:51 +00:00 committed by Gerrit Code Review
commit 75aa0877cb
7 changed files with 245 additions and 211 deletions

View File

@ -19,4 +19,4 @@ PUPPET_FADE_TIMEOUT: 60
# PUPPET_FADE_INTERVAL is used in puppetd.rb file. # PUPPET_FADE_INTERVAL is used in puppetd.rb file.
# Retry every PUPPET_FADE_INTERVAL seconds to check puppet state if it was # Retry every PUPPET_FADE_INTERVAL seconds to check puppet state if it was
# in 'running' state. # in 'running' state.
PUPPET_FADE_INTERVAL: 1 PUPPET_FADE_INTERVAL: 5

View File

@ -58,7 +58,7 @@ module Astute
conf[:PUPPET_FADE_TIMEOUT] = 60 # how long it can take for puppet to exit after dumping to last_run_summary conf[:PUPPET_FADE_TIMEOUT] = 60 # how long it can take for puppet to exit after dumping to last_run_summary
conf[:MC_RETRIES] = 5 # MClient tries to call mcagent before failure conf[:MC_RETRIES] = 5 # MClient tries to call mcagent before failure
conf[:MC_RETRY_INTERVAL] = 1 # MClient sleeps for ## sec between retries conf[:MC_RETRY_INTERVAL] = 1 # MClient sleeps for ## sec between retries
conf[:PUPPET_FADE_INTERVAL] = 1 # retry every ## seconds to check puppet state if it was running conf[:PUPPET_FADE_INTERVAL] = 5 # retry every ## seconds to check puppet state if it was running
conf[:PROVISIONING_TIMEOUT] = 90 * 60 # timeout for booting target OS in provision conf[:PROVISIONING_TIMEOUT] = 90 * 60 # timeout for booting target OS in provision
conf[:REBOOT_TIMEOUT] = 120 # how long it can take for node to reboot conf[:REBOOT_TIMEOUT] = 120 # how long it can take for node to reboot

View File

@ -16,7 +16,7 @@ require 'yaml'
class Astute::DeploymentEngine::NailyFact < Astute::DeploymentEngine class Astute::DeploymentEngine::NailyFact < Astute::DeploymentEngine
def deploy_piece(nodes, retries=2, change_node_status=true) def deploy_piece(nodes, retries=2)
return false unless validate_nodes(nodes) return false unless validate_nodes(nodes)
@ctx.reporter.report(nodes_status(nodes, 'deploying', {'progress' => 0})) @ctx.reporter.report(nodes_status(nodes, 'deploying', {'progress' => 0}))
@ -30,7 +30,7 @@ class Astute::DeploymentEngine::NailyFact < Astute::DeploymentEngine
nodes.each { |node| upload_facts(node) } nodes.each { |node| upload_facts(node) }
Astute.logger.info "#{@ctx.task_id}: Required attrs/metadata passed via facts extension. Starting deployment." Astute.logger.info "#{@ctx.task_id}: Required attrs/metadata passed via facts extension. Starting deployment."
Astute::PuppetdDeployer.deploy(@ctx, nodes, retries, change_node_status) Astute::PuppetdDeployer.deploy(@ctx, nodes, retries)
nodes_roles = nodes.map { |n| {n['uid'] => n['role']} } nodes_roles = nodes.map { |n| {n['uid'] => n['role']} }
Astute.logger.info "#{@ctx.task_id}: Finished deployment of nodes => roles: #{nodes_roles.inspect}" Astute.logger.info "#{@ctx.task_id}: Finished deployment of nodes => roles: #{nodes_roles.inspect}"
end end

View File

@ -18,6 +18,147 @@ require 'timeout'
module Astute module Astute
module PuppetdDeployer module PuppetdDeployer
def self.deploy(ctx, nodes, retries=2)
@ctx = ctx
@nodes_roles = nodes.inject({}) { |h, n| h.merge({n['uid'] => n['role']}) }
@node_retries = nodes.inject({}) { |h, n| h.merge({n['uid'] => retries}) }
@nodes = nodes
Astute.logger.debug "Waiting for puppet to finish deployment on all
nodes (timeout = #{Astute.config.PUPPET_TIMEOUT} sec)..."
time_before = Time.now
deploy_nodes(@nodes.map { |n| n['uid'] })
time_spent = Time.now - time_before
Astute.logger.info "#{@ctx.task_id}: Spent #{time_spent} seconds on puppet run "\
"for following nodes(uids): #{@nodes.map {|n| n['uid']}.join(',')}"
end
private
# Runs puppetd.runonce only if puppet is stopped on the host at the time
# If it isn't stopped, we wait a bit and try again.
# Returns list of nodes uids which appear to be with hung puppet.
def self.puppetd_runonce(uids)
started = Time.now.to_i
while Time.now.to_i - started < Astute.config.PUPPET_FADE_TIMEOUT
running_uids = puppetd(uids).last_run_summary.select { |x|
['running', 'idling'].include?(x.results[:data][:status])
}.map { |n| n.results[:sender] }
stopped_uids = uids - running_uids
@nodes.select { |n| stopped_uids.include? n['uid'] }
.group_by { |n| n['debug'] }
.each do |debug, stop_nodes|
puppetd(stop_nodes.map { |n| n['uid'] }).runonce(:puppet_debug => debug)
end
break if running_uids.empty?
uids = running_uids
sleep Astute.config.PUPPET_FADE_INTERVAL
end
Astute.logger.debug "puppetd_runonce completed within #{Time.now.to_i - started} seconds."
Astute.logger.warn "Following nodes have puppet hung: '#{running_uids.join(',')}'" if running_uids.present?
running_uids
end
def self.calc_nodes_status(last_run, prev_run, hung_nodes=[])
# Finished are those which are not in running state,
# and changed their last_run time, which is changed after application of catalog,
# at the time of updating last_run_summary file. At that particular time puppet is
# still running, and will finish in a couple of seconds.
# If Puppet had crashed before it got a catalog (e.g. certificate problems),
# it didn't update last_run_summary file and switched to 'stopped' state.
stopped = last_run.select { |x| ['stopped', 'disabled'].include? x.results[:data][:status] }
# Select all finished nodes which not failed and changed last_run time.
succeed_nodes = stopped.select { |n|
prev_n = prev_run.find{|ps| ps.results[:sender] == n.results[:sender] }
n.results[:data][:status] == 'stopped' &&
n.results[:data][:resources]['failed'].to_i == 0 &&
n.results[:data][:resources]['failed_to_restart'].to_i == 0 &&
n.results[:data][:time]['last_run'] != (prev_n && prev_n.results[:data][:time]['last_run'])
}.map{|x| x.results[:sender] }
stopped_nodes = stopped.map { |x| x.results[:sender] }
error_nodes = stopped_nodes - succeed_nodes
running_nodes = last_run.map {|n| n.results[:sender]} - stopped_nodes
# Hunged nodes can change state at this moment(success, error or still run),
# but we should to turn it on only in error_nodes
succeed_nodes -= hung_nodes
error_nodes = (error_nodes + hung_nodes).uniq
running_nodes = last_run.map {|n| n.results[:sender]} - stopped_nodes - hung_nodes
nodes_to_check = running_nodes + succeed_nodes + error_nodes
unless nodes_to_check.size == last_run.size
raise "Should never happen. Internal error in nodes statuses calculation. Statuses calculated for: #{nodes_to_check.inspect},"
"nodes passed to check statuses of: #{last_run.map {|n| n.results[:sender]}}"
end
{'succeed' => succeed_nodes, 'error' => error_nodes, 'running' => running_nodes}
end
def self.puppetd(uids)
puppetd = MClient.new(@ctx, "puppetd", Array(uids))
puppetd.on_respond_timeout do |uids|
nodes = uids.map do |uid|
{ 'uid' => uid, 'status' => 'error', 'error_type' => 'deploy', 'role' => @nodes_roles[uid] }
end
@ctx.report_and_update_status('nodes' => nodes)
end
puppetd
end
def self.processing_error_nodes(error_nodes)
nodes_to_report = []
nodes_to_retry = []
error_nodes.each do |uid|
if @node_retries[uid] > 0
@node_retries[uid] -= 1
Astute.logger.debug "Puppet on node #{uid.inspect} will be restarted. "\
"#{@node_retries[uid]} retries remained."
nodes_to_retry << uid
else
Astute.logger.debug "Node #{uid.inspect} has failed to deploy. There is no more retries for puppet run."
nodes_to_report << {'uid' => uid, 'status' => 'error', 'error_type' => 'deploy', 'role' => @nodes_roles[uid] }
end
end
return nodes_to_report, nodes_to_retry
end
def self.processing_running_nodes(running_nodes)
nodes_to_report = []
if running_nodes.present?
begin
# Pass nodes because logs calculation needs IP address of node, not just uid
nodes_progress = @ctx.deploy_log_parser.progress_calculate(running_nodes, @nodes)
if nodes_progress.present?
Astute.logger.debug "Got progress for nodes: #{nodes_progress.inspect}"
# Nodes with progress are running, so they are not included in nodes_to_report yet
nodes_progress.map! { |x| x.merge!('status' => 'deploying', 'role' => @nodes_roles[x['uid']]) }
nodes_to_report = nodes_progress
end
rescue => e
Astute.logger.warn "Some error occurred when parse logs for nodes progress: #{e.message}, "\
"trace: #{e.format_backtrace}"
end
end
nodes_to_report
end
def self.processing_succeed_nodes(succeed_nodes)
succeed_nodes.map do |uid|
{ 'uid' => uid, 'status' => 'ready', 'role' => @nodes_roles[uid] }
end
end
# As I (Andrey Danin) understand, Puppet agent goes through these steps: # As I (Andrey Danin) understand, Puppet agent goes through these steps:
# * Puppetd has 'stopped' state. # * Puppetd has 'stopped' state.
# * We run it as a run_once, and puppetd goes to 'idling' state - it trying to # * We run it as a run_once, and puppetd goes to 'idling' state - it trying to
@ -31,153 +172,39 @@ module Astute
# * After puppetd finished all internal jobs connected with finished catalog, # * After puppetd finished all internal jobs connected with finished catalog,
# it goes to 'idling' state. # it goes to 'idling' state.
# * After a short time it goes to 'stopped' state because we ran it as a run_once. # * After a short time it goes to 'stopped' state because we ran it as a run_once.
def self.deploy_nodes(nodes_to_check)
private
# Runs puppetd.runonce only if puppet is stopped on the host at the time
# If it isn't stopped, we wait a bit and try again.
# Returns list of nodes uids which appear to be with hung puppet.
def self.puppetd_runonce(puppetd, uids, nodes)
debug_mode_dict = nodes.inject({}) {|dict, node| dict[node['uid']] = node['debug']; dict}
started = Time.now.to_i
while Time.now.to_i - started < Astute.config.PUPPET_FADE_TIMEOUT
puppetd.discover(:nodes => uids)
last_run = puppetd.last_run_summary
running_uids = last_run.select {|x| x.results[:data][:status] != 'stopped'}.map {|n| n.results[:sender]}
stopped_uids = uids - running_uids
# If stopped_uids is empty this cycle will not be called.
stopped_uids.each do |uid|
puppetd.discover(:nodes => [uid])
puppetd.runonce(:puppet_debug => debug_mode_dict[uid])
end
uids = running_uids
break if uids.empty?
sleep Astute.config.PUPPET_FADE_INTERVAL
end
Astute.logger.debug "puppetd_runonce completed within #{Time.now.to_i - started} seconds."
Astute.logger.debug "Following nodes have puppet hung: '#{running_uids.join(',')}'" if running_uids.any?
running_uids
end
def self.calc_nodes_status(last_run, prev_run)
# Finished are those which are not in running state,
# and changed their last_run time, which is changed after application of catalog,
# at the time of updating last_run_summary file. At that particular time puppet is
# still running, and will finish in a couple of seconds.
# If Puppet had crashed before it got a catalog (e.g. certificate problems),
# it didn't update last_run_summary file and switched to 'stopped' state.
stopped = last_run.select {|x| x.results[:data][:status] == 'stopped'}
# Select all finished nodes which not failed and changed last_run time.
succeed_nodes = stopped.select { |n|
prev_n = prev_run.find{|ps| ps.results[:sender] == n.results[:sender] }
n.results[:data][:resources]['failed'].to_i == 0 &&
n.results[:data][:resources]['failed_to_restart'].to_i == 0 &&
n.results[:data][:time]['last_run'] != (prev_n && prev_n.results[:data][:time]['last_run'])
}.map{|x| x.results[:sender] }
stopped_nodes = stopped.map {|x| x.results[:sender]}
error_nodes = stopped_nodes - succeed_nodes
# Running are all which didn't appear in finished
running_nodes = last_run.map {|n| n.results[:sender]} - stopped_nodes
nodes_to_check = running_nodes + succeed_nodes + error_nodes
unless nodes_to_check.size == last_run.size
raise "Shoud never happen. Internal error in nodes statuses calculation. Statuses calculated for: #{nodes_to_check.inspect},"
"nodes passed to check statuses of: #{last_run.map {|n| n.results[:sender]}}"
end
{'succeed' => succeed_nodes, 'error' => error_nodes, 'running' => running_nodes}
end
public
def self.deploy(ctx, nodes, retries=2, change_node_status=true)
# TODO: can we hide retries, ignore_failure into @ctx ?
uids = nodes.map { |n| n['uid'] }
nodes_roles = {}
nodes.each { |n| nodes_roles[n['uid']] = n['role'] }
# Keep info about retries for each node
node_retries = {}
uids.each {|x| node_retries.merge!({x => retries}) }
Astute.logger.debug "Waiting for puppet to finish deployment on all nodes (timeout = #{Astute.config.PUPPET_TIMEOUT} sec)..."
time_before = Time.now
Timeout::timeout(Astute.config.PUPPET_TIMEOUT) do Timeout::timeout(Astute.config.PUPPET_TIMEOUT) do
puppetd = MClient.new(ctx, "puppetd", uids) prev_summary = puppetd(nodes_to_check).last_run_summary
puppetd.on_respond_timeout do |uids| hung_nodes = puppetd_runonce(nodes_to_check)
nodes = uids.map do |uid|
{ 'uid' => uid, 'status' => 'error', 'error_type' => 'deploy', 'role' => nodes_roles[uid] } while nodes_to_check.present?
end last_run = puppetd(nodes_to_check).last_run_summary
ctx.report_and_update_status('nodes' => nodes) calc_nodes = calc_nodes_status(last_run, prev_summary, hung_nodes)
end if change_node_status
prev_summary = puppetd.last_run_summary
puppetd_runonce(puppetd, uids, nodes)
nodes_to_check = uids
last_run = puppetd.last_run_summary
while nodes_to_check.any?
calc_nodes = calc_nodes_status(last_run, prev_summary)
Astute.logger.debug "Nodes statuses: #{calc_nodes.inspect}" Astute.logger.debug "Nodes statuses: #{calc_nodes.inspect}"
# At least we will report about successfully deployed nodes report_succeed = processing_succeed_nodes calc_nodes['succeed']
nodes_to_report = [] report_error, nodes_to_retry = processing_error_nodes(calc_nodes['error'])
if change_node_status report_running = processing_running_nodes(calc_nodes['running'])
nodes_to_report.concat(calc_nodes['succeed'].map do |uid|
{ 'uid' => uid, 'status' => 'ready', 'role' => nodes_roles[uid] }
end)
end
# Process retries nodes_to_report = report_succeed + report_error + report_running
nodes_to_retry = [] @ctx.report_and_update_status('nodes' => nodes_to_report) if nodes_to_report.present?
calc_nodes['error'].each do |uid|
if node_retries[uid] > 0 if nodes_to_retry.present?
node_retries[uid] -= 1
Astute.logger.debug "Puppet on node #{uid.inspect} will be restarted. "\
"#{node_retries[uid]} retries remained."
nodes_to_retry << uid
else
Astute.logger.debug "Node #{uid.inspect} has failed to deploy. There is no more retries for puppet run."
nodes_to_report << {'uid' => uid, 'status' => 'error', 'error_type' => 'deploy', 'role' => nodes_roles[uid] } if change_node_status
end
end
if nodes_to_retry.any?
Astute.logger.info "Retrying to run puppet for following error nodes: #{nodes_to_retry.join(',')}" Astute.logger.info "Retrying to run puppet for following error nodes: #{nodes_to_retry.join(',')}"
puppetd_runonce(puppetd, nodes_to_retry, nodes) hung_nodes = puppetd_runonce(nodes_to_retry)
# We need this magic with prev_summary to reflect new puppetd run statuses.. # We need this magic with prev_summary to reflect new puppetd run statuses..
prev_summary.delete_if { |x| nodes_to_retry.include?(x.results[:sender]) } prev_summary.delete_if { |x| nodes_to_retry.include?(x.results[:sender]) }
prev_summary += last_run.select { |x| nodes_to_retry.include?(x.results[:sender]) } prev_summary += last_run.select { |x| nodes_to_retry.include?(x.results[:sender]) }
end end
# /end of processing retries
if calc_nodes['running'].any?
begin
# Pass nodes because logs calculation needs IP address of node, not just uid
nodes_progress = ctx.deploy_log_parser.progress_calculate(calc_nodes['running'], nodes)
if nodes_progress.any?
Astute.logger.debug "Got progress for nodes: #{nodes_progress.inspect}"
# Nodes with progress are running, so they are not included in nodes_to_report yet
nodes_progress.map! { |x| x.merge!('status' => 'deploying', 'role' => nodes_roles[x['uid']]) }
nodes_to_report += nodes_progress
end
rescue => e
Astute.logger.warn "Some error occurred when parse logs for nodes progress: #{e.message}, "\
"trace: #{e.format_backtrace}"
end
end
ctx.report_and_update_status('nodes' => nodes_to_report) if nodes_to_report.any?
# we will iterate only over running nodes and those that we restart deployment for # we will iterate only over running nodes and those that we restart deployment for
nodes_to_check = calc_nodes['running'] + nodes_to_retry nodes_to_check = calc_nodes['running'] + nodes_to_retry
break if nodes_to_check.empty?
break if nodes_to_check.empty?
sleep Astute.config.PUPPET_DEPLOY_INTERVAL sleep Astute.config.PUPPET_DEPLOY_INTERVAL
puppetd.discover(:nodes => nodes_to_check)
last_run = puppetd.last_run_summary
end end
end end
time_spent = Time.now - time_before
Astute.logger.info "#{ctx.task_id}: Spent #{time_spent} seconds on puppet run "\
"for following nodes(uids): #{nodes.map {|n| n['uid']}.join(',')}"
end end
end end
end end

View File

@ -33,6 +33,7 @@ Dir[File.join(File.dirname(__FILE__), 'unit/fixtures/*.rb')].each { |file| requi
# resetting time to sleep significantly increases tests speed # resetting time to sleep significantly increases tests speed
Astute.config.PUPPET_DEPLOY_INTERVAL = 0 Astute.config.PUPPET_DEPLOY_INTERVAL = 0
Astute.config.PUPPET_FADE_INTERVAL = 0 Astute.config.PUPPET_FADE_INTERVAL = 0
Astute.config.PUPPET_FADE_TIMEOUT = 1
Astute.config.MC_RETRY_INTERVAL = 0 Astute.config.MC_RETRY_INTERVAL = 0
Astute.config.PROVISIONING_TIMEOUT = 0 Astute.config.PROVISIONING_TIMEOUT = 0
Astute.config.REBOOT_TIMEOUT = 0 Astute.config.REBOOT_TIMEOUT = 0

View File

@ -60,7 +60,7 @@ describe "NailyFact DeploymentEngine" do
it "it should not raise an exception if deployment mode is unknown" do it "it should not raise an exception if deployment mode is unknown" do
deploy_engine.expects(:upload_facts).times(deploy_data.size) deploy_engine.expects(:upload_facts).times(deploy_data.size)
Astute::PuppetdDeployer.stubs(:deploy).with(ctx, deploy_data, instance_of(Fixnum), true).once Astute::PuppetdDeployer.stubs(:deploy).with(ctx, deploy_data, instance_of(Fixnum)).once
expect {deploy_engine.deploy(deploy_data)}.to_not raise_exception expect {deploy_engine.deploy(deploy_data)}.to_not raise_exception
end end
end end
@ -74,8 +74,8 @@ describe "NailyFact DeploymentEngine" do
deploy_engine.expects(:upload_facts).times(deploy_data.size) deploy_engine.expects(:upload_facts).times(deploy_data.size)
# we got two calls, one for controller (high priority), and another for all computes (same low priority) # we got two calls, one for controller (high priority), and another for all computes (same low priority)
Astute::PuppetdDeployer.expects(:deploy).with(ctx, controller_nodes, instance_of(Fixnum), true).once Astute::PuppetdDeployer.expects(:deploy).with(ctx, controller_nodes, instance_of(Fixnum)).once
Astute::PuppetdDeployer.expects(:deploy).with(ctx, compute_nodes, instance_of(Fixnum), true).once Astute::PuppetdDeployer.expects(:deploy).with(ctx, compute_nodes, instance_of(Fixnum)).once
expect {deploy_engine.deploy(deploy_data)}.to_not raise_exception expect {deploy_engine.deploy(deploy_data)}.to_not raise_exception
end end
@ -122,12 +122,12 @@ describe "NailyFact DeploymentEngine" do
deploy_engine.expects(:upload_facts).at_least_once deploy_engine.expects(:upload_facts).at_least_once
primary_controller = deploy_data.find { |n| n['role'] == 'primary-controller' } primary_controller = deploy_data.find { |n| n['role'] == 'primary-controller' }
Astute::PuppetdDeployer.expects(:deploy).with(ctx, [primary_controller], 2, true).once Astute::PuppetdDeployer.expects(:deploy).with(ctx, [primary_controller], 2).once
controller_nodes.each do |n| controller_nodes.each do |n|
Astute::PuppetdDeployer.expects(:deploy).with(ctx, [n], 2, true).once Astute::PuppetdDeployer.expects(:deploy).with(ctx, [n], 2).once
end end
Astute::PuppetdDeployer.expects(:deploy).with(ctx, compute_nodes, instance_of(Fixnum), true).once Astute::PuppetdDeployer.expects(:deploy).with(ctx, compute_nodes, instance_of(Fixnum)).once
deploy_engine.deploy(deploy_data) deploy_engine.deploy(deploy_data)
end end

View File

@ -22,15 +22,15 @@ describe "Puppetd" do
context "PuppetdDeployer" do context "PuppetdDeployer" do
let(:reporter) { mock('reporter') } let(:reporter) { mock('reporter') }
let(:ctx) do let(:ctx) do
Context.new("task id", ProxyReporter::DeploymentProxyReporter.new(reporter), Astute::LogParser::NoParsing.new) Context.new("task id", ProxyReporter::DeploymentProxyReporter.new(reporter), Astute::LogParser::NoParsing.new)
end end
let(:nodes) { [{'uid' => '1', 'role' => 'compute'}] } let(:nodes) { [{'uid' => '1', 'role' => 'compute'}] }
let(:rpcclient) { mock_rpcclient(nodes) } let(:rpcclient) { mock_rpcclient(nodes) }
let(:last_run_result) do let(:last_run_result) do
{ {
:statuscode =>0, :statuscode =>0,
@ -48,113 +48,94 @@ describe "Puppetd" do
:sender=>"1" :sender=>"1"
} }
end end
let(:last_run_result_running) do let(:last_run_result_running) do
res = deep_copy(last_run_result) res = deep_copy(last_run_result)
res[:data].merge!(:status => 'running', :running => 1, :stopped => 0) res[:data].merge!(:status => 'running', :running => 1, :stopped => 0)
res res
end end
let(:last_run_result_fail) do let(:last_run_result_fail) do
res = deep_copy(last_run_result_running) res = deep_copy(last_run_result_running)
res[:data].merge!(:runtime => 1358426000, res[:data].merge!(:runtime => 1358426000,
:time => {"last_run" => 1358426000}, :time => {"last_run" => 1358426000},
:resources => {"failed" => 1} :resources => {"failed" => 1}
) )
res res
end end
let(:last_run_failed) do let(:last_run_failed) do
res = deep_copy(last_run_result_fail) res = deep_copy(last_run_result_fail)
res[:data].merge!(:status => 'stopped', :stopped => 1, :running => 0) res[:data].merge!(:status => 'stopped', :stopped => 1, :running => 0)
res res
end end
let(:last_run_result_finished) do let(:last_run_result_finished) do
res = deep_copy last_run_result res = deep_copy last_run_result
res[:data][:time]['last_run'] = 1358428000 res[:data][:time]['last_run'] = 1358428000
res[:data][:status] = 'stopped' res[:data][:status] = 'stopped'
res res
end end
context 'reportet behavior' do context 'reportet behavior' do
let(:last_run_result) do
{
:data=> {
:time=>{"last_run"=>1358425701},
:status => "running",
:resources => {'failed' => 0},
:running => 1,
:idling => 0
},
:sender=>"1"
}
end
let(:prepare_mcollective_env) do let(:prepare_mcollective_env) do
last_run_result_new = deep_copy last_run_result last_run_result_new = deep_copy last_run_result
last_run_result_new[:data][:time]['last_run'] = 1358426000 last_run_result_new[:data][:time]['last_run'] = 1358426000
rpcclient_new_res = mock_mc_result(last_run_result_new) rpcclient_new_res = mock_mc_result(last_run_result_new)
rpcclient_finished_res = mock_mc_result(last_run_result_finished) rpcclient_finished_res = mock_mc_result(last_run_result_finished)
rpcclient_valid_result = mock_mc_result(last_run_result) rpcclient_valid_result = mock_mc_result(last_run_result)
rpcclient.stubs(:last_run_summary).returns([rpcclient_valid_result]).then. rpcclient.stubs(:last_run_summary).returns([rpcclient_valid_result]).then.
returns([rpcclient_valid_result]).then. returns([rpcclient_valid_result]).then.
returns([ mock_mc_result(last_run_result_running) ]).then.
returns([ mock_mc_result(last_run_result_running) ]).then.
returns([rpcclient_new_res]).then. returns([rpcclient_new_res]).then.
returns([rpcclient_finished_res]) returns([rpcclient_finished_res])
rpcclient rpcclient
end end
it "reports ready status for node if puppet deploy finished successfully" do it "reports ready status for node if puppet deploy finished successfully" do
prepare_mcollective_env prepare_mcollective_env
reporter.expects(:report).with('nodes' => [{'uid' => '1', 'status' => 'ready', 'progress' => 100, 'role' => 'compute'}]) reporter.expects(:report).with('nodes' => [{'uid' => '1', 'status' => 'ready', 'progress' => 100, 'role' => 'compute'}])
rpcclient.expects(:runonce).at_least_once.returns([mock_mc_result(last_run_result)]) rpcclient.expects(:runonce).at_least_once.returns([mock_mc_result(last_run_result)])
Astute::PuppetdDeployer.deploy(ctx, nodes, retries=0) Astute::PuppetdDeployer.deploy(ctx, nodes, retries=0)
end end
it "doesn't report ready status for node if change_node_status disabled" do
prepare_mcollective_env
reporter.expects(:report).never
rpcclient.expects(:runonce).at_least_once.returns([mock_mc_result(last_run_result)])
Astute::PuppetdDeployer.deploy(ctx, nodes, retries=0, change_node_status=false)
end
context 'multiroles behavior' do context 'multiroles behavior' do
let(:nodes) { [{'uid' => '1', 'role' => 'compute'}] } let(:nodes) { [{'uid' => '1', 'role' => 'compute'}] }
let(:nodes_multiroles) { [{'uid' => '1', 'role' => 'controller'}] } let(:nodes_multiroles) { [{'uid' => '1', 'role' => 'controller'}] }
before(:each) do before(:each) do
@ctx = Context.new("task id", @ctx = Context.new("task id",
ProxyReporter::DeploymentProxyReporter.new(reporter, nodes + nodes_multiroles), ProxyReporter::DeploymentProxyReporter.new(reporter, nodes + nodes_multiroles),
Astute::LogParser::NoParsing.new Astute::LogParser::NoParsing.new
) )
end end
it "it should not send final status before all roles of node will deploy" do it "it should not send final status before all roles of node will deploy" do
prepare_mcollective_env prepare_mcollective_env
reporter.expects(:report).with('nodes' => [{'uid' => '1', 'status' => 'deploying', 'progress' => 50, 'role' => 'compute'}]) reporter.expects(:report).with('nodes' => [{'uid' => '1', 'status' => 'deploying', 'progress' => 50, 'role' => 'compute'}])
rpcclient.expects(:runonce).at_least_once.returns([mock_mc_result(last_run_result)]) rpcclient.expects(:runonce).at_least_once.returns([mock_mc_result(last_run_result)])
Astute::PuppetdDeployer.deploy(@ctx, nodes, retries=0) Astute::PuppetdDeployer.deploy(@ctx, nodes, retries=0)
end end
end end
end end
context "puppet state transitions" do context "puppet state transitions" do
let(:last_run_result_idle_pre) do let(:last_run_result_idle_pre) do
res = deep_copy(last_run_result) res = deep_copy(last_run_result)
res[:data].merge!(:status => 'idling', :idling => 1, :stopped => 0) res[:data].merge!(:status => 'idling', :idling => 1, :stopped => 0)
res res
end end
let(:last_run_result_idle_post) do let(:last_run_result_idle_post) do
res = deep_copy(last_run_result_fail) res = deep_copy(last_run_result_fail)
res[:data].merge!(:status => 'idling', :idling => 1, :running => 0) res[:data].merge!(:status => 'idling', :idling => 1, :running => 0)
@ -172,11 +153,11 @@ describe "Puppetd" do
returns([ mock_mc_result(last_run_result_fail) ]).then. returns([ mock_mc_result(last_run_result_fail) ]).then.
returns([ mock_mc_result(last_run_result_fail) ]).then. returns([ mock_mc_result(last_run_result_fail) ]).then.
returns([ mock_mc_result(last_run_failed) ]) returns([ mock_mc_result(last_run_failed) ])
reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}]) reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}])
rpcclient.expects(:runonce).once. rpcclient.expects(:runonce).once.
returns([ mock_mc_result(last_run_result) ]) returns([ mock_mc_result(last_run_result) ])
Astute::PuppetdDeployer.deploy(ctx, nodes, 0) Astute::PuppetdDeployer.deploy(ctx, nodes, 0)
end end
@ -188,11 +169,11 @@ describe "Puppetd" do
returns([ mock_mc_result(last_run_result_running) ]).then. returns([ mock_mc_result(last_run_result_running) ]).then.
returns([ mock_mc_result(last_run_result_fail) ]).then. returns([ mock_mc_result(last_run_result_fail) ]).then.
returns([ mock_mc_result(last_run_failed) ]) returns([ mock_mc_result(last_run_failed) ])
reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}]) reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}])
rpcclient.expects(:runonce).once. rpcclient.expects(:runonce).once.
returns([ mock_mc_result(last_run_result) ]) returns([ mock_mc_result(last_run_result) ])
Astute::PuppetdDeployer.deploy(ctx, nodes, 0) Astute::PuppetdDeployer.deploy(ctx, nodes, 0)
end end
@ -202,26 +183,26 @@ describe "Puppetd" do
returns([ mock_mc_result(last_run_result) ]).then. returns([ mock_mc_result(last_run_result) ]).then.
returns([ mock_mc_result(last_run_result_running) ]).then. returns([ mock_mc_result(last_run_result_running) ]).then.
returns([ mock_mc_result(last_run_failed) ]) returns([ mock_mc_result(last_run_failed) ])
reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}]) reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}])
rpcclient.expects(:runonce).once. rpcclient.expects(:runonce).once.
returns([ mock_mc_result(last_run_result) ]) returns([ mock_mc_result(last_run_result) ])
Astute::PuppetdDeployer.deploy(ctx, nodes, 0) Astute::PuppetdDeployer.deploy(ctx, nodes, 0)
end end
it "publishes error status for node if puppet failed (a cycle w/ one running state only)" do it "publishes error status for node if puppet failed (a cycle with one running state only)" do
rpcclient.stubs(:last_run_summary).times(5). rpcclient.stubs(:last_run_summary).times(5).
returns([ mock_mc_result(last_run_result) ]).then. returns([ mock_mc_result(last_run_result) ]).then.
returns([ mock_mc_result(last_run_result) ]).then. returns([ mock_mc_result(last_run_result) ]).then.
returns([ mock_mc_result(last_run_result_running) ]).then. returns([ mock_mc_result(last_run_result_running) ]).then.
returns([ mock_mc_result(last_run_result_fail) ]).then. returns([ mock_mc_result(last_run_result_fail) ]).then.
returns([ mock_mc_result(last_run_failed) ]) returns([ mock_mc_result(last_run_failed) ])
reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}]) reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}])
rpcclient.expects(:runonce).once. rpcclient.expects(:runonce).once.
returns([ mock_mc_result(last_run_result) ]) returns([ mock_mc_result(last_run_result) ])
Astute::PuppetdDeployer.deploy(ctx, nodes, 0) Astute::PuppetdDeployer.deploy(ctx, nodes, 0)
end end
@ -230,30 +211,55 @@ describe "Puppetd" do
returns([ mock_mc_result(last_run_result) ]).then. returns([ mock_mc_result(last_run_result) ]).then.
returns([ mock_mc_result(last_run_result) ]).then. returns([ mock_mc_result(last_run_result) ]).then.
returns([ mock_mc_result(last_run_failed) ]) returns([ mock_mc_result(last_run_failed) ])
reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}]) reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}])
rpcclient.expects(:runonce).once. rpcclient.expects(:runonce).once.
returns([ mock_mc_result(last_run_result) ]) returns([ mock_mc_result(last_run_result) ])
Astute::PuppetdDeployer.deploy(ctx, nodes, 0) Astute::PuppetdDeployer.deploy(ctx, nodes, 0)
end end
end
it "doesn't publish error status for node if change_node_status disabled" do context '' do
reporter.expects(:report).never around(:each) do |example|
old_value = Astute.config.PUPPET_FADE_INTERVAL
example.run
Astute.config.PUPPET_FADE_INTERVAL = old_value
end
rpcclient_valid_result = mock_mc_result(last_run_result) before(:each) do
rpcclient_new_res = mock_mc_result(last_run_result_fail) Astute.config.PUPPET_FADE_INTERVAL = 1
rpcclient_finished_res = mock_mc_result(last_run_failed) end
rpcclient.stubs(:last_run_summary).returns([rpcclient_valid_result]).then. it "publishes error status for node if puppet running alien task (attempts been exhausted)" do
returns([rpcclient_valid_result]).then. rpcclient.stubs(:last_run_summary).at_least(3).
returns([rpcclient_new_res]).then. returns([ mock_mc_result(last_run_result_running) ]).then.
returns([rpcclient_finished_res]) returns([ mock_mc_result(last_run_result_running) ]).then.
rpcclient.expects(:runonce).at_least_once.returns([rpcclient_valid_result]) returns([ mock_mc_result(last_run_result_running) ]).then.
returns([ mock_mc_result(last_run_result_running) ])
reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}])
rpcclient.expects(:runonce).never
Astute::PuppetdDeployer.deploy(ctx, nodes, 0)
end
it "ignore exit code of puppet running of alien task (waited for alien task stop and launched own)" do
rpcclient.stubs(:last_run_summary).at_least(3).
returns([ mock_mc_result(last_run_result_running) ]).then.
returns([ mock_mc_result(last_run_result_running) ]).then.
returns([ mock_mc_result(last_run_failed) ]).then.
returns([ mock_mc_result(last_run_failed) ]).then.
returns([ mock_mc_result(last_run_result_running) ]).then.
returns([ mock_mc_result(last_run_result_running) ]).then.
returns([ mock_mc_result(last_run_result_finished) ])
rpcclient.expects(:runonce).at_least(1).returns([ mock_mc_result(last_run_result) ])
reporter.expects(:report).with('nodes' => [{'uid' => '1', 'status' => 'ready', 'progress' => 100, 'role' => 'compute'}])
Astute::PuppetdDeployer.deploy(ctx, nodes, 1)
end
end
MClient.any_instance.stubs(:rpcclient).returns(rpcclient)
Astute::PuppetdDeployer.deploy(ctx, nodes, retries=0, change_node_status=false)
end end
it "retries to run puppet if it fails" do it "retries to run puppet if it fails" do
@ -268,7 +274,7 @@ describe "Puppetd" do
returns([rpcclient_failed]).then. returns([rpcclient_failed]).then.
returns([rpcclient_fail]).then. returns([rpcclient_fail]).then.
returns([rpcclient_succeed]) returns([rpcclient_succeed])
reporter.expects(:report).with('nodes' => [{'uid' => '1', 'status' => 'ready', 'progress' => 100, 'role' => 'compute'}]) reporter.expects(:report).with('nodes' => [{'uid' => '1', 'status' => 'ready', 'progress' => 100, 'role' => 'compute'}])
rpcclient.expects(:runonce).at_least_once.returns([rpcclient_valid_result]) rpcclient.expects(:runonce).at_least_once.returns([rpcclient_valid_result])