From 1b8d6e18a3c75b4aaf61fe5709e4f83be36c8d6f Mon Sep 17 00:00:00 2001 From: Vladimir Date: Tue, 10 Dec 2013 14:03:24 +0400 Subject: [PATCH] Strong puppet hung check * add second round for puppetd_runonce check; * change hang message to warn status; * mark hung nodes as error (before was running); * remove unused 'change_node_status' param; * refactoring; * add tests. Change-Id: Iba7ac5d30a1e57842a44c95c3c68f19dd71bccff Closes-Bug: #1256244 --- examples/example_astute_config.yaml | 2 +- lib/astute/config.rb | 2 +- lib/astute/deployment_engine/nailyfact.rb | 4 +- lib/astute/puppetd.rb | 289 ++++++++++++---------- spec/spec_helper.rb | 1 + spec/unit/nailyfact_deploy_spec.rb | 12 +- spec/unit/puppetd_spec.rb | 146 +++++------ 7 files changed, 245 insertions(+), 211 deletions(-) diff --git a/examples/example_astute_config.yaml b/examples/example_astute_config.yaml index fdf30e67..c93e3ddb 100644 --- a/examples/example_astute_config.yaml +++ b/examples/example_astute_config.yaml @@ -19,4 +19,4 @@ PUPPET_FADE_TIMEOUT: 60 # PUPPET_FADE_INTERVAL is used in puppetd.rb file. # Retry every PUPPET_FADE_INTERVAL seconds to check puppet state if it was # in 'running' state. -PUPPET_FADE_INTERVAL: 1 +PUPPET_FADE_INTERVAL: 5 diff --git a/lib/astute/config.rb b/lib/astute/config.rb index 004645b2..9b67c29a 100644 --- a/lib/astute/config.rb +++ b/lib/astute/config.rb @@ -58,7 +58,7 @@ module Astute conf[:PUPPET_FADE_TIMEOUT] = 60 # how long it can take for puppet to exit after dumping to last_run_summary conf[:MC_RETRIES] = 5 # MClient tries to call mcagent before failure conf[:MC_RETRY_INTERVAL] = 1 # MClient sleeps for ## sec between retries - conf[:PUPPET_FADE_INTERVAL] = 1 # retry every ## seconds to check puppet state if it was running + conf[:PUPPET_FADE_INTERVAL] = 5 # retry every ## seconds to check puppet state if it was running conf[:PROVISIONING_TIMEOUT] = 90 * 60 # timeout for booting target OS in provision conf[:REBOOT_TIMEOUT] = 120 # how long it can take for node to reboot diff --git a/lib/astute/deployment_engine/nailyfact.rb b/lib/astute/deployment_engine/nailyfact.rb index b21f6666..b2c3ebe7 100644 --- a/lib/astute/deployment_engine/nailyfact.rb +++ b/lib/astute/deployment_engine/nailyfact.rb @@ -16,7 +16,7 @@ require 'yaml' class Astute::DeploymentEngine::NailyFact < Astute::DeploymentEngine - def deploy_piece(nodes, retries=2, change_node_status=true) + def deploy_piece(nodes, retries=2) return false unless validate_nodes(nodes) @ctx.reporter.report(nodes_status(nodes, 'deploying', {'progress' => 0})) @@ -30,7 +30,7 @@ class Astute::DeploymentEngine::NailyFact < Astute::DeploymentEngine nodes.each { |node| upload_facts(node) } Astute.logger.info "#{@ctx.task_id}: Required attrs/metadata passed via facts extension. Starting deployment." - Astute::PuppetdDeployer.deploy(@ctx, nodes, retries, change_node_status) + Astute::PuppetdDeployer.deploy(@ctx, nodes, retries) nodes_roles = nodes.map { |n| {n['uid'] => n['role']} } Astute.logger.info "#{@ctx.task_id}: Finished deployment of nodes => roles: #{nodes_roles.inspect}" end diff --git a/lib/astute/puppetd.rb b/lib/astute/puppetd.rb index b9d8849e..4648f90b 100644 --- a/lib/astute/puppetd.rb +++ b/lib/astute/puppetd.rb @@ -18,6 +18,147 @@ require 'timeout' module Astute module PuppetdDeployer + + def self.deploy(ctx, nodes, retries=2) + @ctx = ctx + @nodes_roles = nodes.inject({}) { |h, n| h.merge({n['uid'] => n['role']}) } + @node_retries = nodes.inject({}) { |h, n| h.merge({n['uid'] => retries}) } + @nodes = nodes + + Astute.logger.debug "Waiting for puppet to finish deployment on all + nodes (timeout = #{Astute.config.PUPPET_TIMEOUT} sec)..." + time_before = Time.now + + deploy_nodes(@nodes.map { |n| n['uid'] }) + + time_spent = Time.now - time_before + Astute.logger.info "#{@ctx.task_id}: Spent #{time_spent} seconds on puppet run "\ + "for following nodes(uids): #{@nodes.map {|n| n['uid']}.join(',')}" + end + + private + # Runs puppetd.runonce only if puppet is stopped on the host at the time + # If it isn't stopped, we wait a bit and try again. + # Returns list of nodes uids which appear to be with hung puppet. + def self.puppetd_runonce(uids) + started = Time.now.to_i + while Time.now.to_i - started < Astute.config.PUPPET_FADE_TIMEOUT + running_uids = puppetd(uids).last_run_summary.select { |x| + ['running', 'idling'].include?(x.results[:data][:status]) + }.map { |n| n.results[:sender] } + stopped_uids = uids - running_uids + + @nodes.select { |n| stopped_uids.include? n['uid'] } + .group_by { |n| n['debug'] } + .each do |debug, stop_nodes| + puppetd(stop_nodes.map { |n| n['uid'] }).runonce(:puppet_debug => debug) + end + break if running_uids.empty? + + uids = running_uids + sleep Astute.config.PUPPET_FADE_INTERVAL + end + Astute.logger.debug "puppetd_runonce completed within #{Time.now.to_i - started} seconds." + Astute.logger.warn "Following nodes have puppet hung: '#{running_uids.join(',')}'" if running_uids.present? + running_uids + end + + def self.calc_nodes_status(last_run, prev_run, hung_nodes=[]) + # Finished are those which are not in running state, + # and changed their last_run time, which is changed after application of catalog, + # at the time of updating last_run_summary file. At that particular time puppet is + # still running, and will finish in a couple of seconds. + # If Puppet had crashed before it got a catalog (e.g. certificate problems), + # it didn't update last_run_summary file and switched to 'stopped' state. + + stopped = last_run.select { |x| ['stopped', 'disabled'].include? x.results[:data][:status] } + + # Select all finished nodes which not failed and changed last_run time. + succeed_nodes = stopped.select { |n| + prev_n = prev_run.find{|ps| ps.results[:sender] == n.results[:sender] } + + n.results[:data][:status] == 'stopped' && + n.results[:data][:resources]['failed'].to_i == 0 && + n.results[:data][:resources]['failed_to_restart'].to_i == 0 && + n.results[:data][:time]['last_run'] != (prev_n && prev_n.results[:data][:time]['last_run']) + }.map{|x| x.results[:sender] } + + stopped_nodes = stopped.map { |x| x.results[:sender] } + error_nodes = stopped_nodes - succeed_nodes + running_nodes = last_run.map {|n| n.results[:sender]} - stopped_nodes + + # Hunged nodes can change state at this moment(success, error or still run), + # but we should to turn it on only in error_nodes + succeed_nodes -= hung_nodes + error_nodes = (error_nodes + hung_nodes).uniq + running_nodes = last_run.map {|n| n.results[:sender]} - stopped_nodes - hung_nodes + + + nodes_to_check = running_nodes + succeed_nodes + error_nodes + unless nodes_to_check.size == last_run.size + raise "Should never happen. Internal error in nodes statuses calculation. Statuses calculated for: #{nodes_to_check.inspect}," + "nodes passed to check statuses of: #{last_run.map {|n| n.results[:sender]}}" + end + {'succeed' => succeed_nodes, 'error' => error_nodes, 'running' => running_nodes} + end + + + def self.puppetd(uids) + puppetd = MClient.new(@ctx, "puppetd", Array(uids)) + puppetd.on_respond_timeout do |uids| + nodes = uids.map do |uid| + { 'uid' => uid, 'status' => 'error', 'error_type' => 'deploy', 'role' => @nodes_roles[uid] } + end + @ctx.report_and_update_status('nodes' => nodes) + end + puppetd + end + + def self.processing_error_nodes(error_nodes) + nodes_to_report = [] + nodes_to_retry = [] + + error_nodes.each do |uid| + if @node_retries[uid] > 0 + @node_retries[uid] -= 1 + Astute.logger.debug "Puppet on node #{uid.inspect} will be restarted. "\ + "#{@node_retries[uid]} retries remained." + nodes_to_retry << uid + else + Astute.logger.debug "Node #{uid.inspect} has failed to deploy. There is no more retries for puppet run." + nodes_to_report << {'uid' => uid, 'status' => 'error', 'error_type' => 'deploy', 'role' => @nodes_roles[uid] } + end + end + + return nodes_to_report, nodes_to_retry + end + + def self.processing_running_nodes(running_nodes) + nodes_to_report = [] + if running_nodes.present? + begin + # Pass nodes because logs calculation needs IP address of node, not just uid + nodes_progress = @ctx.deploy_log_parser.progress_calculate(running_nodes, @nodes) + if nodes_progress.present? + Astute.logger.debug "Got progress for nodes: #{nodes_progress.inspect}" + # Nodes with progress are running, so they are not included in nodes_to_report yet + nodes_progress.map! { |x| x.merge!('status' => 'deploying', 'role' => @nodes_roles[x['uid']]) } + nodes_to_report = nodes_progress + end + rescue => e + Astute.logger.warn "Some error occurred when parse logs for nodes progress: #{e.message}, "\ + "trace: #{e.format_backtrace}" + end + end + nodes_to_report + end + + def self.processing_succeed_nodes(succeed_nodes) + succeed_nodes.map do |uid| + { 'uid' => uid, 'status' => 'ready', 'role' => @nodes_roles[uid] } + end + end + # As I (Andrey Danin) understand, Puppet agent goes through these steps: # * Puppetd has 'stopped' state. # * We run it as a run_once, and puppetd goes to 'idling' state - it trying to @@ -31,153 +172,39 @@ module Astute # * After puppetd finished all internal jobs connected with finished catalog, # it goes to 'idling' state. # * After a short time it goes to 'stopped' state because we ran it as a run_once. - - private - # Runs puppetd.runonce only if puppet is stopped on the host at the time - # If it isn't stopped, we wait a bit and try again. - # Returns list of nodes uids which appear to be with hung puppet. - def self.puppetd_runonce(puppetd, uids, nodes) - debug_mode_dict = nodes.inject({}) {|dict, node| dict[node['uid']] = node['debug']; dict} - started = Time.now.to_i - while Time.now.to_i - started < Astute.config.PUPPET_FADE_TIMEOUT - puppetd.discover(:nodes => uids) - last_run = puppetd.last_run_summary - running_uids = last_run.select {|x| x.results[:data][:status] != 'stopped'}.map {|n| n.results[:sender]} - stopped_uids = uids - running_uids - # If stopped_uids is empty this cycle will not be called. - stopped_uids.each do |uid| - puppetd.discover(:nodes => [uid]) - puppetd.runonce(:puppet_debug => debug_mode_dict[uid]) - end - uids = running_uids - break if uids.empty? - sleep Astute.config.PUPPET_FADE_INTERVAL - end - Astute.logger.debug "puppetd_runonce completed within #{Time.now.to_i - started} seconds." - Astute.logger.debug "Following nodes have puppet hung: '#{running_uids.join(',')}'" if running_uids.any? - running_uids - end - - def self.calc_nodes_status(last_run, prev_run) - # Finished are those which are not in running state, - # and changed their last_run time, which is changed after application of catalog, - # at the time of updating last_run_summary file. At that particular time puppet is - # still running, and will finish in a couple of seconds. - # If Puppet had crashed before it got a catalog (e.g. certificate problems), - # it didn't update last_run_summary file and switched to 'stopped' state. - - stopped = last_run.select {|x| x.results[:data][:status] == 'stopped'} - - # Select all finished nodes which not failed and changed last_run time. - succeed_nodes = stopped.select { |n| - prev_n = prev_run.find{|ps| ps.results[:sender] == n.results[:sender] } - n.results[:data][:resources]['failed'].to_i == 0 && - n.results[:data][:resources]['failed_to_restart'].to_i == 0 && - n.results[:data][:time]['last_run'] != (prev_n && prev_n.results[:data][:time]['last_run']) - }.map{|x| x.results[:sender] } - - stopped_nodes = stopped.map {|x| x.results[:sender]} - error_nodes = stopped_nodes - succeed_nodes - - # Running are all which didn't appear in finished - running_nodes = last_run.map {|n| n.results[:sender]} - stopped_nodes - - nodes_to_check = running_nodes + succeed_nodes + error_nodes - unless nodes_to_check.size == last_run.size - raise "Shoud never happen. Internal error in nodes statuses calculation. Statuses calculated for: #{nodes_to_check.inspect}," - "nodes passed to check statuses of: #{last_run.map {|n| n.results[:sender]}}" - end - {'succeed' => succeed_nodes, 'error' => error_nodes, 'running' => running_nodes} - end - - public - def self.deploy(ctx, nodes, retries=2, change_node_status=true) - # TODO: can we hide retries, ignore_failure into @ctx ? - uids = nodes.map { |n| n['uid'] } - nodes_roles = {} - nodes.each { |n| nodes_roles[n['uid']] = n['role'] } - - # Keep info about retries for each node - node_retries = {} - uids.each {|x| node_retries.merge!({x => retries}) } - Astute.logger.debug "Waiting for puppet to finish deployment on all nodes (timeout = #{Astute.config.PUPPET_TIMEOUT} sec)..." - time_before = Time.now + def self.deploy_nodes(nodes_to_check) Timeout::timeout(Astute.config.PUPPET_TIMEOUT) do - puppetd = MClient.new(ctx, "puppetd", uids) - puppetd.on_respond_timeout do |uids| - nodes = uids.map do |uid| - { 'uid' => uid, 'status' => 'error', 'error_type' => 'deploy', 'role' => nodes_roles[uid] } - end - ctx.report_and_update_status('nodes' => nodes) - end if change_node_status - prev_summary = puppetd.last_run_summary - puppetd_runonce(puppetd, uids, nodes) - nodes_to_check = uids - last_run = puppetd.last_run_summary - while nodes_to_check.any? - calc_nodes = calc_nodes_status(last_run, prev_summary) + prev_summary = puppetd(nodes_to_check).last_run_summary + hung_nodes = puppetd_runonce(nodes_to_check) + + while nodes_to_check.present? + last_run = puppetd(nodes_to_check).last_run_summary + calc_nodes = calc_nodes_status(last_run, prev_summary, hung_nodes) Astute.logger.debug "Nodes statuses: #{calc_nodes.inspect}" - # At least we will report about successfully deployed nodes - nodes_to_report = [] - if change_node_status - nodes_to_report.concat(calc_nodes['succeed'].map do |uid| - { 'uid' => uid, 'status' => 'ready', 'role' => nodes_roles[uid] } - end) - end + report_succeed = processing_succeed_nodes calc_nodes['succeed'] + report_error, nodes_to_retry = processing_error_nodes(calc_nodes['error']) + report_running = processing_running_nodes(calc_nodes['running']) - # Process retries - nodes_to_retry = [] - calc_nodes['error'].each do |uid| - if node_retries[uid] > 0 - node_retries[uid] -= 1 - Astute.logger.debug "Puppet on node #{uid.inspect} will be restarted. "\ - "#{node_retries[uid]} retries remained." - nodes_to_retry << uid - else - Astute.logger.debug "Node #{uid.inspect} has failed to deploy. There is no more retries for puppet run." - nodes_to_report << {'uid' => uid, 'status' => 'error', 'error_type' => 'deploy', 'role' => nodes_roles[uid] } if change_node_status - end - end - if nodes_to_retry.any? + nodes_to_report = report_succeed + report_error + report_running + @ctx.report_and_update_status('nodes' => nodes_to_report) if nodes_to_report.present? + + if nodes_to_retry.present? Astute.logger.info "Retrying to run puppet for following error nodes: #{nodes_to_retry.join(',')}" - puppetd_runonce(puppetd, nodes_to_retry, nodes) + hung_nodes = puppetd_runonce(nodes_to_retry) # We need this magic with prev_summary to reflect new puppetd run statuses.. prev_summary.delete_if { |x| nodes_to_retry.include?(x.results[:sender]) } prev_summary += last_run.select { |x| nodes_to_retry.include?(x.results[:sender]) } end - # /end of processing retries - - if calc_nodes['running'].any? - begin - # Pass nodes because logs calculation needs IP address of node, not just uid - nodes_progress = ctx.deploy_log_parser.progress_calculate(calc_nodes['running'], nodes) - if nodes_progress.any? - Astute.logger.debug "Got progress for nodes: #{nodes_progress.inspect}" - # Nodes with progress are running, so they are not included in nodes_to_report yet - nodes_progress.map! { |x| x.merge!('status' => 'deploying', 'role' => nodes_roles[x['uid']]) } - nodes_to_report += nodes_progress - end - rescue => e - Astute.logger.warn "Some error occurred when parse logs for nodes progress: #{e.message}, "\ - "trace: #{e.format_backtrace}" - end - end - - ctx.report_and_update_status('nodes' => nodes_to_report) if nodes_to_report.any? # we will iterate only over running nodes and those that we restart deployment for nodes_to_check = calc_nodes['running'] + nodes_to_retry - break if nodes_to_check.empty? + break if nodes_to_check.empty? sleep Astute.config.PUPPET_DEPLOY_INTERVAL - puppetd.discover(:nodes => nodes_to_check) - last_run = puppetd.last_run_summary end end - time_spent = Time.now - time_before - Astute.logger.info "#{ctx.task_id}: Spent #{time_spent} seconds on puppet run "\ - "for following nodes(uids): #{nodes.map {|n| n['uid']}.join(',')}" end + end end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 745baba3..f975f512 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -33,6 +33,7 @@ Dir[File.join(File.dirname(__FILE__), 'unit/fixtures/*.rb')].each { |file| requi # resetting time to sleep significantly increases tests speed Astute.config.PUPPET_DEPLOY_INTERVAL = 0 Astute.config.PUPPET_FADE_INTERVAL = 0 +Astute.config.PUPPET_FADE_TIMEOUT = 1 Astute.config.MC_RETRY_INTERVAL = 0 Astute.config.PROVISIONING_TIMEOUT = 0 Astute.config.REBOOT_TIMEOUT = 0 diff --git a/spec/unit/nailyfact_deploy_spec.rb b/spec/unit/nailyfact_deploy_spec.rb index 42abf179..fa05baed 100644 --- a/spec/unit/nailyfact_deploy_spec.rb +++ b/spec/unit/nailyfact_deploy_spec.rb @@ -60,7 +60,7 @@ describe "NailyFact DeploymentEngine" do it "it should not raise an exception if deployment mode is unknown" do deploy_engine.expects(:upload_facts).times(deploy_data.size) - Astute::PuppetdDeployer.stubs(:deploy).with(ctx, deploy_data, instance_of(Fixnum), true).once + Astute::PuppetdDeployer.stubs(:deploy).with(ctx, deploy_data, instance_of(Fixnum)).once expect {deploy_engine.deploy(deploy_data)}.to_not raise_exception end end @@ -74,8 +74,8 @@ describe "NailyFact DeploymentEngine" do deploy_engine.expects(:upload_facts).times(deploy_data.size) # we got two calls, one for controller (high priority), and another for all computes (same low priority) - Astute::PuppetdDeployer.expects(:deploy).with(ctx, controller_nodes, instance_of(Fixnum), true).once - Astute::PuppetdDeployer.expects(:deploy).with(ctx, compute_nodes, instance_of(Fixnum), true).once + Astute::PuppetdDeployer.expects(:deploy).with(ctx, controller_nodes, instance_of(Fixnum)).once + Astute::PuppetdDeployer.expects(:deploy).with(ctx, compute_nodes, instance_of(Fixnum)).once expect {deploy_engine.deploy(deploy_data)}.to_not raise_exception end @@ -122,12 +122,12 @@ describe "NailyFact DeploymentEngine" do deploy_engine.expects(:upload_facts).at_least_once primary_controller = deploy_data.find { |n| n['role'] == 'primary-controller' } - Astute::PuppetdDeployer.expects(:deploy).with(ctx, [primary_controller], 2, true).once + Astute::PuppetdDeployer.expects(:deploy).with(ctx, [primary_controller], 2).once controller_nodes.each do |n| - Astute::PuppetdDeployer.expects(:deploy).with(ctx, [n], 2, true).once + Astute::PuppetdDeployer.expects(:deploy).with(ctx, [n], 2).once end - Astute::PuppetdDeployer.expects(:deploy).with(ctx, compute_nodes, instance_of(Fixnum), true).once + Astute::PuppetdDeployer.expects(:deploy).with(ctx, compute_nodes, instance_of(Fixnum)).once deploy_engine.deploy(deploy_data) end diff --git a/spec/unit/puppetd_spec.rb b/spec/unit/puppetd_spec.rb index 1e37a8c4..700f0314 100644 --- a/spec/unit/puppetd_spec.rb +++ b/spec/unit/puppetd_spec.rb @@ -22,15 +22,15 @@ describe "Puppetd" do context "PuppetdDeployer" do let(:reporter) { mock('reporter') } - + let(:ctx) do Context.new("task id", ProxyReporter::DeploymentProxyReporter.new(reporter), Astute::LogParser::NoParsing.new) end - + let(:nodes) { [{'uid' => '1', 'role' => 'compute'}] } - + let(:rpcclient) { mock_rpcclient(nodes) } - + let(:last_run_result) do { :statuscode =>0, @@ -48,113 +48,94 @@ describe "Puppetd" do :sender=>"1" } end - + let(:last_run_result_running) do res = deep_copy(last_run_result) res[:data].merge!(:status => 'running', :running => 1, :stopped => 0) res end - + let(:last_run_result_fail) do res = deep_copy(last_run_result_running) - res[:data].merge!(:runtime => 1358426000, + res[:data].merge!(:runtime => 1358426000, :time => {"last_run" => 1358426000}, :resources => {"failed" => 1} ) res end - + let(:last_run_failed) do res = deep_copy(last_run_result_fail) res[:data].merge!(:status => 'stopped', :stopped => 1, :running => 0) res end - + let(:last_run_result_finished) do res = deep_copy last_run_result res[:data][:time]['last_run'] = 1358428000 res[:data][:status] = 'stopped' res end - + context 'reportet behavior' do - let(:last_run_result) do - { - :data=> { - :time=>{"last_run"=>1358425701}, - :status => "running", - :resources => {'failed' => 0}, - :running => 1, - :idling => 0 - }, - :sender=>"1" - } - end - + let(:prepare_mcollective_env) do last_run_result_new = deep_copy last_run_result last_run_result_new[:data][:time]['last_run'] = 1358426000 - + rpcclient_new_res = mock_mc_result(last_run_result_new) rpcclient_finished_res = mock_mc_result(last_run_result_finished) rpcclient_valid_result = mock_mc_result(last_run_result) rpcclient.stubs(:last_run_summary).returns([rpcclient_valid_result]).then. returns([rpcclient_valid_result]).then. + returns([ mock_mc_result(last_run_result_running) ]).then. + returns([ mock_mc_result(last_run_result_running) ]).then. returns([rpcclient_new_res]).then. returns([rpcclient_finished_res]) - + rpcclient end it "reports ready status for node if puppet deploy finished successfully" do prepare_mcollective_env - + reporter.expects(:report).with('nodes' => [{'uid' => '1', 'status' => 'ready', 'progress' => 100, 'role' => 'compute'}]) rpcclient.expects(:runonce).at_least_once.returns([mock_mc_result(last_run_result)]) - + Astute::PuppetdDeployer.deploy(ctx, nodes, retries=0) end - it "doesn't report ready status for node if change_node_status disabled" do - prepare_mcollective_env - - reporter.expects(:report).never - rpcclient.expects(:runonce).at_least_once.returns([mock_mc_result(last_run_result)]) - - Astute::PuppetdDeployer.deploy(ctx, nodes, retries=0, change_node_status=false) - end - context 'multiroles behavior' do let(:nodes) { [{'uid' => '1', 'role' => 'compute'}] } let(:nodes_multiroles) { [{'uid' => '1', 'role' => 'controller'}] } before(:each) do - @ctx = Context.new("task id", - ProxyReporter::DeploymentProxyReporter.new(reporter, nodes + nodes_multiroles), + @ctx = Context.new("task id", + ProxyReporter::DeploymentProxyReporter.new(reporter, nodes + nodes_multiroles), Astute::LogParser::NoParsing.new ) end - + it "it should not send final status before all roles of node will deploy" do prepare_mcollective_env - - reporter.expects(:report).with('nodes' => [{'uid' => '1', 'status' => 'deploying', 'progress' => 50, 'role' => 'compute'}]) + + reporter.expects(:report).with('nodes' => [{'uid' => '1', 'status' => 'deploying', 'progress' => 50, 'role' => 'compute'}]) rpcclient.expects(:runonce).at_least_once.returns([mock_mc_result(last_run_result)]) - + Astute::PuppetdDeployer.deploy(@ctx, nodes, retries=0) end end - + end context "puppet state transitions" do - + let(:last_run_result_idle_pre) do res = deep_copy(last_run_result) res[:data].merge!(:status => 'idling', :idling => 1, :stopped => 0) res end - + let(:last_run_result_idle_post) do res = deep_copy(last_run_result_fail) res[:data].merge!(:status => 'idling', :idling => 1, :running => 0) @@ -172,11 +153,11 @@ describe "Puppetd" do returns([ mock_mc_result(last_run_result_fail) ]).then. returns([ mock_mc_result(last_run_result_fail) ]).then. returns([ mock_mc_result(last_run_failed) ]) - + reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}]) rpcclient.expects(:runonce).once. returns([ mock_mc_result(last_run_result) ]) - + Astute::PuppetdDeployer.deploy(ctx, nodes, 0) end @@ -188,11 +169,11 @@ describe "Puppetd" do returns([ mock_mc_result(last_run_result_running) ]).then. returns([ mock_mc_result(last_run_result_fail) ]).then. returns([ mock_mc_result(last_run_failed) ]) - + reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}]) rpcclient.expects(:runonce).once. returns([ mock_mc_result(last_run_result) ]) - + Astute::PuppetdDeployer.deploy(ctx, nodes, 0) end @@ -202,26 +183,26 @@ describe "Puppetd" do returns([ mock_mc_result(last_run_result) ]).then. returns([ mock_mc_result(last_run_result_running) ]).then. returns([ mock_mc_result(last_run_failed) ]) - + reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}]) rpcclient.expects(:runonce).once. returns([ mock_mc_result(last_run_result) ]) - + Astute::PuppetdDeployer.deploy(ctx, nodes, 0) end - it "publishes error status for node if puppet failed (a cycle w/ one running state only)" do + it "publishes error status for node if puppet failed (a cycle with one running state only)" do rpcclient.stubs(:last_run_summary).times(5). returns([ mock_mc_result(last_run_result) ]).then. returns([ mock_mc_result(last_run_result) ]).then. returns([ mock_mc_result(last_run_result_running) ]).then. returns([ mock_mc_result(last_run_result_fail) ]).then. returns([ mock_mc_result(last_run_failed) ]) - + reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}]) rpcclient.expects(:runonce).once. returns([ mock_mc_result(last_run_result) ]) - + Astute::PuppetdDeployer.deploy(ctx, nodes, 0) end @@ -230,30 +211,55 @@ describe "Puppetd" do returns([ mock_mc_result(last_run_result) ]).then. returns([ mock_mc_result(last_run_result) ]).then. returns([ mock_mc_result(last_run_failed) ]) - + reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}]) rpcclient.expects(:runonce).once. returns([ mock_mc_result(last_run_result) ]) - + Astute::PuppetdDeployer.deploy(ctx, nodes, 0) end - end - it "doesn't publish error status for node if change_node_status disabled" do - reporter.expects(:report).never + context '' do + around(:each) do |example| + old_value = Astute.config.PUPPET_FADE_INTERVAL + example.run + Astute.config.PUPPET_FADE_INTERVAL = old_value + end - rpcclient_valid_result = mock_mc_result(last_run_result) - rpcclient_new_res = mock_mc_result(last_run_result_fail) - rpcclient_finished_res = mock_mc_result(last_run_failed) + before(:each) do + Astute.config.PUPPET_FADE_INTERVAL = 1 + end - rpcclient.stubs(:last_run_summary).returns([rpcclient_valid_result]).then. - returns([rpcclient_valid_result]).then. - returns([rpcclient_new_res]).then. - returns([rpcclient_finished_res]) - rpcclient.expects(:runonce).at_least_once.returns([rpcclient_valid_result]) + it "publishes error status for node if puppet running alien task (attempts been exhausted)" do + rpcclient.stubs(:last_run_summary).at_least(3). + returns([ mock_mc_result(last_run_result_running) ]).then. + returns([ mock_mc_result(last_run_result_running) ]).then. + returns([ mock_mc_result(last_run_result_running) ]).then. + returns([ mock_mc_result(last_run_result_running) ]) + + reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}]) + rpcclient.expects(:runonce).never + + Astute::PuppetdDeployer.deploy(ctx, nodes, 0) + end + + it "ignore exit code of puppet running of alien task (waited for alien task stop and launched own)" do + rpcclient.stubs(:last_run_summary).at_least(3). + returns([ mock_mc_result(last_run_result_running) ]).then. + returns([ mock_mc_result(last_run_result_running) ]).then. + returns([ mock_mc_result(last_run_failed) ]).then. + returns([ mock_mc_result(last_run_failed) ]).then. + returns([ mock_mc_result(last_run_result_running) ]).then. + returns([ mock_mc_result(last_run_result_running) ]).then. + returns([ mock_mc_result(last_run_result_finished) ]) + + rpcclient.expects(:runonce).at_least(1).returns([ mock_mc_result(last_run_result) ]) + reporter.expects(:report).with('nodes' => [{'uid' => '1', 'status' => 'ready', 'progress' => 100, 'role' => 'compute'}]) + + Astute::PuppetdDeployer.deploy(ctx, nodes, 1) + end + end - MClient.any_instance.stubs(:rpcclient).returns(rpcclient) - Astute::PuppetdDeployer.deploy(ctx, nodes, retries=0, change_node_status=false) end it "retries to run puppet if it fails" do @@ -268,7 +274,7 @@ describe "Puppetd" do returns([rpcclient_failed]).then. returns([rpcclient_fail]).then. returns([rpcclient_succeed]) - + reporter.expects(:report).with('nodes' => [{'uid' => '1', 'status' => 'ready', 'progress' => 100, 'role' => 'compute'}]) rpcclient.expects(:runonce).at_least_once.returns([rpcclient_valid_result])