diff --git a/lib/astute/config.rb b/lib/astute/config.rb index 431a0f18..2984c13b 100644 --- a/lib/astute/config.rb +++ b/lib/astute/config.rb @@ -62,6 +62,7 @@ module Astute conf[:puppet_start_interval] = 2 # interval between attemps to start puppet conf[:puppet_retries] = 2 # how many times astute will try to run puppet conf[:puppet_succeed_retries] = 0 # use this to rerun a puppet task again if it was successful (idempotency) + conf[:puppet_undefined_retries] = 3 # how many times astute will try to get actual status of node before fail conf[:puppet_module_path] = '/etc/puppet/modules' # where we should find basic modules for puppet conf[:puppet_noop_run] = false # enable Puppet noop run conf[:mc_retries] = 10 # MClient tries to call mcagent before failure diff --git a/lib/astute/mclients/puppet_mclient.rb b/lib/astute/mclients/puppet_mclient.rb index ca1975f0..d50b9309 100644 --- a/lib/astute/mclients/puppet_mclient.rb +++ b/lib/astute/mclients/puppet_mclient.rb @@ -56,14 +56,14 @@ module Astute # Create configured puppet mcollective agent # @return [Astute::MClient] - def puppetd + def puppetd(timeout=nil, retries=1) puppetd = MClient.new( @ctx, "puppetd", [@node_id], _check_result=true, - _timeout=nil, - _retries=1, + _timeout=timeout, + _retries=retries, _enable_result_logging=false ) puppetd.on_respond_timeout do |uids| @@ -77,7 +77,7 @@ module Astute # Run last_run_summary action using mcollective puppet agent # @return [Hash] return hash with status and resources def last_run_summary - @summary = puppetd.last_run_summary( + @summary = puppetd(_timeout=10, _retries=6).last_run_summary( :puppet_noop_run => @options['puppet_noop_run'], :raw_report => @options['raw_report'] ).first[:data] diff --git a/lib/astute/puppet_job.rb b/lib/astute/puppet_job.rb index 5ab94f24..1334ffea 100644 --- a/lib/astute/puppet_job.rb +++ b/lib/astute/puppet_job.rb @@ -39,8 +39,6 @@ module Astute 'stopped', 'disabled' ] - FAILED_STATUSES = UNDEFINED_STATUSES + STOPED_STATUSES - def initialize(task, puppet_mclient, options) @task = task @retries = options['retries'] @@ -48,6 +46,8 @@ module Astute @puppet_start_interval = options['puppet_start_interval'] @time_observer = TimeObserver.new(options['timeout']) @succeed_retries = options['succeed_retries'] + @undefined_retries = options['undefined_retries'] + @original_undefined_retries = options['undefined_retries'] @puppet_mclient = puppet_mclient end @@ -76,6 +76,8 @@ module Astute processing_running_task when 'failed' processing_error_task + when 'undefined' + processing_undefined_task end time_is_up! if should_stop? @@ -169,8 +171,10 @@ module Astute 'successful' when BUSY_STATUSES.include?(mco_puppet_status) 'running' - when FAILED_STATUSES.include?(mco_puppet_status) + when STOPED_STATUSES.include?(mco_puppet_status) 'failed' + when UNDEFINED_STATUSES.include?(mco_puppet_status) + 'undefined' else raise StatusValidationError, "Unknow puppet status: #{mco_puppet_status}" @@ -189,7 +193,7 @@ module Astute # @return [void] def log_current_status(status) message = "#{task_details_for_log}, status: #{status}" - if FAILED_STATUSES.include?(status) + if (UNDEFINED_STATUSES + STOPED_STATUSES).include?(status) Astute.logger.error message else Astute.logger.debug message @@ -197,8 +201,9 @@ module Astute end # Process additional action in case of puppet succeed - # @return [String] Task status: successful, failed or running + # @return [String] Task status: successful or running def processing_succeed_task + reset_undefined_retries! Astute.logger.debug "Puppet completed within "\ "#{@time_observer.since_start} seconds" if @succeed_retries > 0 @@ -218,8 +223,9 @@ module Astute end # Process additional action in case of puppet failed - # @return [String] Task status: successful, failed or running + # @return [String] Task status: failed or running def processing_error_task + reset_undefined_retries! if @retries > 0 @retries -= 1 Astute.logger.debug "Puppet on node will be "\ @@ -236,12 +242,39 @@ module Astute end end + # Process additional action in case of undefined puppet status + # @return [String] Task status: failed or running + def processing_undefined_task + if @undefined_retries > 0 + @undefined_retries -= 1 + Astute.logger.debug "Puppet on node has undefined status. "\ + "#{@undefined_retries} retries remained. "\ + "#{task_details_for_log}" + Astute.logger.info "Retrying to check status for following "\ + "nodes: #{@puppet_mclient.node_id}" + 'running' + else + Astute.logger.error "Node has failed to get status. There is"\ + " no more retries for status check. #{task_details_for_log}" + 'failed' + end + end + # Process additional action in case of puppet running # @return [String]: Task status: successful, failed or running def processing_running_task + reset_undefined_retries! 'running' end + # Reset undefined retries to original value + # @return [void] + def reset_undefined_retries! + Astute.logger.debug "Reset undefined retries to original "\ + "value: #{@original_undefined_retries}" + @undefined_retries = @original_undefined_retries + end + end #PuppetJob end diff --git a/lib/astute/tasks/puppet.rb b/lib/astute/tasks/puppet.rb index 5f9fe9bc..69fc7237 100644 --- a/lib/astute/tasks/puppet.rb +++ b/lib/astute/tasks/puppet.rb @@ -47,6 +47,7 @@ module Astute 'timeout' => Astute.config.puppet_timeout, 'puppet_debug' => false, 'succeed_retries' => Astute.config.puppet_succeed_retries, + 'undefined_retries' => Astute.config.puppet_undefined_retries, 'raw_report' => Astute.config.puppet_raw_report, 'puppet_noop_run' => Astute.config.puppet_noop_run, 'puppet_start_timeout' => Astute.config.puppet_start_timeout, @@ -67,6 +68,7 @@ module Astute { 'retries' => @task['parameters']['retries'], 'succeed_retries' => @task['parameters']['succeed_retries'], + 'undefined_retries' => @task['parameters']['undefined_retries'], 'timeout' => @task['parameters']['timeout'], 'puppet_start_timeout' => @task['parameters'][ 'puppet_start_timeout'], diff --git a/spec/unit/puppet_job_spec.rb b/spec/unit/puppet_job_spec.rb index cd18e2bd..ed6f8b08 100644 --- a/spec/unit/puppet_job_spec.rb +++ b/spec/unit/puppet_job_spec.rb @@ -31,6 +31,7 @@ describe Astute::PuppetJob do { 'retries' => 1, 'succeed_retries' => 0, + 'undefined_retries' => 1, 'timeout' => 1, 'puppet_start_timeout' => 1, 'puppet_start_interval' => 0 @@ -71,7 +72,7 @@ describe Astute::PuppetJob do describe '#task_status=' do it 'should raise error if status do not support' do expect {subject.send(:task_status=, 'unknow_status')}.to \ - raise_error(StatusValidationError, /unknow_status/) + raise_error(Astute::StatusValidationError, /unknow_status/) end end @@ -84,7 +85,7 @@ describe Astute::PuppetJob do puppet_mclient.expects(:status).returns('unknow_status') expect {subject.status}.to raise_error( - StatusValidationError, /unknow_status/ + Astute::StatusValidationError, /unknow_status/ ) end end @@ -124,7 +125,7 @@ describe Astute::PuppetJob do end it 'should return runing when magent failed but can retry' do - puppet_mclient.expects(:run).twice.returns(true) + puppet_mclient.expects(:run).once.returns(true) subject.run puppet_mclient.stubs(:status) @@ -149,13 +150,45 @@ describe Astute::PuppetJob do expect(subject.status).to eq('successful') end + it 'should successful if undefined/failed but retry succeed' do + puppet_mclient.stubs(:run).returns(true) + options['undefined_retries'] = 1 + options['retries'] = 1 + subject.run + + puppet_mclient.stubs(:status) + .then.returns('undefined') + .returns('stopped') + .then.returns('undefined') + .then.returns('succeed') + + 3.times { expect(subject.status).to eq('running') } + expect(subject.status). to eq('successful') + end + it 'should successful if failed but retry succeed' do puppet_mclient.stubs(:run).returns(true) + options['undefined_retries'] = 0 options['retries'] = 2 subject.run puppet_mclient.stubs(:status) .returns('stopped') + .then.returns('stopped') + .then.returns('succeed') + + 2.times { expect(subject.status).to eq('running') } + expect(subject.status). to eq('successful') + end + + it 'should successful if undefined but retry succeed' do + puppet_mclient.stubs(:run).returns(true) + options['undefined_retries'] = 2 + options['retries'] = 0 + subject.run + + puppet_mclient.stubs(:status) + .returns('undefined') .then.returns('undefined') .then.returns('succeed') @@ -188,7 +221,7 @@ describe Astute::PuppetJob do subject.run puppet_mclient.stubs(:status) - .returns('undefined') + .returns('stopped') .then.returns('stopped') expect(subject.status).to eq('running') @@ -210,13 +243,67 @@ describe Astute::PuppetJob do subject.run puppet_mclient.stubs(:status) - .returns('undefined') + .returns('stopped') .then.returns('stopped') expect(subject.status).to eq('running') 3.times { expect(subject.status). to eq('failed') } end end + + context 'undefined' do + it 'should return failed if undefined and no more retries' do + puppet_mclient.stubs(:run).returns(true) + subject.run + + puppet_mclient.stubs(:status) + .returns('undefined') + .then.returns('undefined') + + expect(subject.status).to eq('running') + expect(subject.status).to eq('failed') + end + + it 'should return failed if time is over and no result' do + puppet_mclient.stubs(:run).returns(true) + options['timeout'] = 0 + subject.run + + puppet_mclient.stubs(:status).returns('undefined') + expect(subject.status).to eq('failed') + end + + it 'should do nothing if final status set and retries end' do + puppet_mclient.stubs(:run).returns(true) + options['undefined'] = 0 + subject.run + + puppet_mclient.stubs(:status) + .returns('undefined') + .then.returns('undefined') + + expect(subject.status).to eq('running') + 3.times { expect(subject.status). to eq('failed') } + end + + it 'should reset retries if answer was received' do + puppet_mclient.stubs(:run).returns(true) + options['undefined_retries'] = 1 + options['retries'] = 0 + subject.run + + puppet_mclient.stubs(:status) + .then.returns('undefined') + .returns('running') + .then.returns('undefined') + .then.returns('succeed') + + 3.times { expect(subject.status).to eq('running') } + expect(subject.status). to eq('successful') + end + end + + end end