From 1b8d6e18a3c75b4aaf61fe5709e4f83be36c8d6f Mon Sep 17 00:00:00 2001
From: Vladimir <vsharshov@mirantis.com>
Date: Tue, 10 Dec 2013 14:03:24 +0400
Subject: [PATCH] Strong puppet hung check

* add second round for puppetd_runonce check;
* change hang message to warn status;
* mark hung nodes as error (before was running);
* remove unused 'change_node_status' param;
* refactoring;
* add tests.

Change-Id: Iba7ac5d30a1e57842a44c95c3c68f19dd71bccff
Closes-Bug: #1256244
---
 examples/example_astute_config.yaml       |   2 +-
 lib/astute/config.rb                      |   2 +-
 lib/astute/deployment_engine/nailyfact.rb |   4 +-
 lib/astute/puppetd.rb                     | 289 ++++++++++++----------
 spec/spec_helper.rb                       |   1 +
 spec/unit/nailyfact_deploy_spec.rb        |  12 +-
 spec/unit/puppetd_spec.rb                 | 146 +++++------
 7 files changed, 245 insertions(+), 211 deletions(-)

diff --git a/examples/example_astute_config.yaml b/examples/example_astute_config.yaml
index fdf30e67..c93e3ddb 100644
--- a/examples/example_astute_config.yaml
+++ b/examples/example_astute_config.yaml
@@ -19,4 +19,4 @@ PUPPET_FADE_TIMEOUT: 60
 # PUPPET_FADE_INTERVAL is used in puppetd.rb file.
 # Retry every PUPPET_FADE_INTERVAL seconds to check puppet state if it was
 # in 'running' state.
-PUPPET_FADE_INTERVAL: 1
+PUPPET_FADE_INTERVAL: 5
diff --git a/lib/astute/config.rb b/lib/astute/config.rb
index 004645b2..9b67c29a 100644
--- a/lib/astute/config.rb
+++ b/lib/astute/config.rb
@@ -58,7 +58,7 @@ module Astute
     conf[:PUPPET_FADE_TIMEOUT] = 60       # how long it can take for puppet to exit after dumping to last_run_summary
     conf[:MC_RETRIES] = 5                 # MClient tries to call mcagent before failure
     conf[:MC_RETRY_INTERVAL] = 1          # MClient sleeps for ## sec between retries
-    conf[:PUPPET_FADE_INTERVAL] = 1       # retry every ## seconds to check puppet state if it was running
+    conf[:PUPPET_FADE_INTERVAL] = 5       # retry every ## seconds to check puppet state if it was running
     conf[:PROVISIONING_TIMEOUT] = 90 * 60 # timeout for booting target OS in provision
     conf[:REBOOT_TIMEOUT] = 120           # how long it can take for node to reboot
 
diff --git a/lib/astute/deployment_engine/nailyfact.rb b/lib/astute/deployment_engine/nailyfact.rb
index b21f6666..b2c3ebe7 100644
--- a/lib/astute/deployment_engine/nailyfact.rb
+++ b/lib/astute/deployment_engine/nailyfact.rb
@@ -16,7 +16,7 @@ require 'yaml'
 
 class Astute::DeploymentEngine::NailyFact < Astute::DeploymentEngine
 
-  def deploy_piece(nodes, retries=2, change_node_status=true)
+  def deploy_piece(nodes, retries=2)
     return false unless validate_nodes(nodes)
 
     @ctx.reporter.report(nodes_status(nodes, 'deploying', {'progress' => 0}))
@@ -30,7 +30,7 @@ class Astute::DeploymentEngine::NailyFact < Astute::DeploymentEngine
     nodes.each { |node| upload_facts(node) }
     Astute.logger.info "#{@ctx.task_id}: Required attrs/metadata passed via facts extension. Starting deployment."
 
-    Astute::PuppetdDeployer.deploy(@ctx, nodes, retries, change_node_status)
+    Astute::PuppetdDeployer.deploy(@ctx, nodes, retries)
     nodes_roles = nodes.map { |n| {n['uid'] => n['role']} }
     Astute.logger.info "#{@ctx.task_id}: Finished deployment of nodes => roles: #{nodes_roles.inspect}"
   end
diff --git a/lib/astute/puppetd.rb b/lib/astute/puppetd.rb
index b9d8849e..4648f90b 100644
--- a/lib/astute/puppetd.rb
+++ b/lib/astute/puppetd.rb
@@ -18,6 +18,147 @@ require 'timeout'
 
 module Astute
   module PuppetdDeployer
+
+    def self.deploy(ctx, nodes, retries=2)
+      @ctx = ctx
+      @nodes_roles = nodes.inject({}) { |h, n| h.merge({n['uid'] => n['role']}) }
+      @node_retries = nodes.inject({}) { |h, n| h.merge({n['uid'] => retries}) }
+      @nodes = nodes
+
+      Astute.logger.debug "Waiting for puppet to finish deployment on all
+                           nodes (timeout = #{Astute.config.PUPPET_TIMEOUT} sec)..."
+      time_before = Time.now
+
+      deploy_nodes(@nodes.map { |n| n['uid'] })
+
+      time_spent = Time.now - time_before
+      Astute.logger.info "#{@ctx.task_id}: Spent #{time_spent} seconds on puppet run "\
+                         "for following nodes(uids): #{@nodes.map {|n| n['uid']}.join(',')}"
+    end
+
+    private
+    # Runs puppetd.runonce only if puppet is stopped on the host at the time
+    # If it isn't stopped, we wait a bit and try again.
+    # Returns list of nodes uids which appear to be with hung puppet.
+    def self.puppetd_runonce(uids)
+      started = Time.now.to_i
+      while Time.now.to_i - started < Astute.config.PUPPET_FADE_TIMEOUT
+        running_uids = puppetd(uids).last_run_summary.select { |x|
+          ['running', 'idling'].include?(x.results[:data][:status])
+        }.map { |n| n.results[:sender] }
+        stopped_uids = uids - running_uids
+
+        @nodes.select { |n| stopped_uids.include? n['uid'] }
+             .group_by { |n| n['debug'] }
+             .each do |debug, stop_nodes|
+               puppetd(stop_nodes.map { |n| n['uid'] }).runonce(:puppet_debug => debug)
+             end
+        break if running_uids.empty?
+
+        uids = running_uids
+        sleep Astute.config.PUPPET_FADE_INTERVAL
+      end
+      Astute.logger.debug "puppetd_runonce completed within #{Time.now.to_i - started} seconds."
+      Astute.logger.warn "Following nodes have puppet hung: '#{running_uids.join(',')}'" if running_uids.present?
+      running_uids
+    end
+
+    def self.calc_nodes_status(last_run, prev_run, hung_nodes=[])
+      # Finished are those which are not in running state,
+      #   and changed their last_run time, which is changed after application of catalog,
+      #   at the time of updating last_run_summary file. At that particular time puppet is
+      #   still running, and will finish in a couple of seconds.
+      # If Puppet had crashed before it got a catalog (e.g. certificate problems),
+      #   it didn't update last_run_summary file and switched to 'stopped' state.
+
+      stopped = last_run.select { |x| ['stopped', 'disabled'].include? x.results[:data][:status] }
+
+      # Select all finished nodes which not failed and changed last_run time.
+      succeed_nodes = stopped.select { |n|
+        prev_n = prev_run.find{|ps| ps.results[:sender] == n.results[:sender] }
+
+        n.results[:data][:status] == 'stopped' &&
+        n.results[:data][:resources]['failed'].to_i == 0 &&
+        n.results[:data][:resources]['failed_to_restart'].to_i == 0 &&
+        n.results[:data][:time]['last_run'] != (prev_n && prev_n.results[:data][:time]['last_run'])
+      }.map{|x| x.results[:sender] }
+
+      stopped_nodes = stopped.map { |x| x.results[:sender] }
+      error_nodes = stopped_nodes - succeed_nodes
+      running_nodes = last_run.map {|n| n.results[:sender]} - stopped_nodes
+
+      # Hunged nodes can change state at this moment(success, error or still run),
+      # but we should to turn it on only in error_nodes
+      succeed_nodes -= hung_nodes
+      error_nodes = (error_nodes + hung_nodes).uniq
+      running_nodes = last_run.map {|n| n.results[:sender]} - stopped_nodes - hung_nodes
+
+
+      nodes_to_check = running_nodes + succeed_nodes + error_nodes
+      unless nodes_to_check.size == last_run.size
+        raise "Should never happen. Internal error in nodes statuses calculation. Statuses calculated for: #{nodes_to_check.inspect},"
+                    "nodes passed to check statuses of: #{last_run.map {|n| n.results[:sender]}}"
+      end
+      {'succeed' => succeed_nodes, 'error' => error_nodes, 'running' => running_nodes}
+    end
+
+
+    def self.puppetd(uids)
+      puppetd = MClient.new(@ctx, "puppetd", Array(uids))
+      puppetd.on_respond_timeout do |uids|
+        nodes = uids.map do |uid|
+          { 'uid' => uid, 'status' => 'error', 'error_type' => 'deploy', 'role' => @nodes_roles[uid] }
+        end
+        @ctx.report_and_update_status('nodes' => nodes)
+      end
+      puppetd
+    end
+
+    def self.processing_error_nodes(error_nodes)
+      nodes_to_report = []
+      nodes_to_retry = []
+
+      error_nodes.each do |uid|
+        if @node_retries[uid] > 0
+          @node_retries[uid] -= 1
+          Astute.logger.debug "Puppet on node #{uid.inspect} will be restarted. "\
+                              "#{@node_retries[uid]} retries remained."
+          nodes_to_retry << uid
+        else
+          Astute.logger.debug "Node #{uid.inspect} has failed to deploy. There is no more retries for puppet run."
+          nodes_to_report << {'uid' => uid, 'status' => 'error', 'error_type' => 'deploy', 'role' => @nodes_roles[uid] }
+        end
+      end
+
+      return nodes_to_report, nodes_to_retry
+    end
+
+    def self.processing_running_nodes(running_nodes)
+      nodes_to_report = []
+      if running_nodes.present?
+        begin
+          # Pass nodes because logs calculation needs IP address of node, not just uid
+          nodes_progress = @ctx.deploy_log_parser.progress_calculate(running_nodes, @nodes)
+          if nodes_progress.present?
+            Astute.logger.debug "Got progress for nodes: #{nodes_progress.inspect}"
+            # Nodes with progress are running, so they are not included in nodes_to_report yet
+            nodes_progress.map! { |x| x.merge!('status' => 'deploying', 'role' => @nodes_roles[x['uid']]) }
+            nodes_to_report = nodes_progress
+          end
+        rescue => e
+          Astute.logger.warn "Some error occurred when parse logs for nodes progress: #{e.message}, "\
+                             "trace: #{e.format_backtrace}"
+        end
+      end
+      nodes_to_report
+    end
+
+    def self.processing_succeed_nodes(succeed_nodes)
+      succeed_nodes.map do |uid|
+        { 'uid' => uid, 'status' => 'ready', 'role' => @nodes_roles[uid] }
+      end
+    end
+
     # As I (Andrey Danin) understand, Puppet agent goes through these steps:
     #   * Puppetd has 'stopped' state.
     #   * We run it as a run_once, and puppetd goes to 'idling' state - it trying to
@@ -31,153 +172,39 @@ module Astute
     #   * After puppetd finished all internal jobs connected with finished catalog,
     #       it goes to 'idling' state.
     #   * After a short time it goes to 'stopped' state because we ran it as a run_once.
-
-    private
-    # Runs puppetd.runonce only if puppet is stopped on the host at the time
-    # If it isn't stopped, we wait a bit and try again.
-    # Returns list of nodes uids which appear to be with hung puppet.
-    def self.puppetd_runonce(puppetd, uids, nodes)
-      debug_mode_dict = nodes.inject({}) {|dict, node| dict[node['uid']] = node['debug']; dict}
-      started = Time.now.to_i
-      while Time.now.to_i - started < Astute.config.PUPPET_FADE_TIMEOUT
-        puppetd.discover(:nodes => uids)
-        last_run = puppetd.last_run_summary
-        running_uids = last_run.select {|x| x.results[:data][:status] != 'stopped'}.map {|n| n.results[:sender]}
-        stopped_uids = uids - running_uids
-        # If stopped_uids is empty this cycle will not be called.
-        stopped_uids.each do |uid|
-          puppetd.discover(:nodes => [uid])
-          puppetd.runonce(:puppet_debug => debug_mode_dict[uid])
-        end
-        uids = running_uids
-        break if uids.empty?
-        sleep Astute.config.PUPPET_FADE_INTERVAL
-      end
-      Astute.logger.debug "puppetd_runonce completed within #{Time.now.to_i - started} seconds."
-      Astute.logger.debug "Following nodes have puppet hung: '#{running_uids.join(',')}'" if running_uids.any?
-      running_uids
-    end
-
-    def self.calc_nodes_status(last_run, prev_run)
-      # Finished are those which are not in running state,
-      #   and changed their last_run time, which is changed after application of catalog,
-      #   at the time of updating last_run_summary file. At that particular time puppet is
-      #   still running, and will finish in a couple of seconds.
-      # If Puppet had crashed before it got a catalog (e.g. certificate problems),
-      #   it didn't update last_run_summary file and switched to 'stopped' state.
-
-      stopped = last_run.select {|x| x.results[:data][:status] == 'stopped'}
-
-      # Select all finished nodes which not failed and changed last_run time.
-      succeed_nodes = stopped.select { |n|
-        prev_n = prev_run.find{|ps| ps.results[:sender] == n.results[:sender] }
-        n.results[:data][:resources]['failed'].to_i == 0 &&
-          n.results[:data][:resources]['failed_to_restart'].to_i == 0 &&
-          n.results[:data][:time]['last_run'] != (prev_n && prev_n.results[:data][:time]['last_run'])
-      }.map{|x| x.results[:sender] }
-
-      stopped_nodes = stopped.map {|x| x.results[:sender]}
-      error_nodes = stopped_nodes - succeed_nodes
-
-      # Running are all which didn't appear in finished
-      running_nodes = last_run.map {|n| n.results[:sender]} - stopped_nodes
-
-      nodes_to_check = running_nodes + succeed_nodes + error_nodes
-      unless nodes_to_check.size == last_run.size
-        raise "Shoud never happen. Internal error in nodes statuses calculation. Statuses calculated for: #{nodes_to_check.inspect},"
-                    "nodes passed to check statuses of: #{last_run.map {|n| n.results[:sender]}}"
-      end
-      {'succeed' => succeed_nodes, 'error' => error_nodes, 'running' => running_nodes}
-    end
-
-    public
-    def self.deploy(ctx, nodes, retries=2, change_node_status=true)
-      # TODO: can we hide retries, ignore_failure into @ctx ?
-      uids = nodes.map { |n| n['uid'] }
-      nodes_roles = {}
-      nodes.each { |n| nodes_roles[n['uid']] = n['role'] }
-      
-      # Keep info about retries for each node
-      node_retries = {}
-      uids.each {|x| node_retries.merge!({x => retries}) }
-      Astute.logger.debug "Waiting for puppet to finish deployment on all nodes (timeout = #{Astute.config.PUPPET_TIMEOUT} sec)..."
-      time_before = Time.now
+    def self.deploy_nodes(nodes_to_check)
       Timeout::timeout(Astute.config.PUPPET_TIMEOUT) do
-        puppetd = MClient.new(ctx, "puppetd", uids)
-        puppetd.on_respond_timeout do |uids|
-          nodes = uids.map do |uid|
-            { 'uid' => uid, 'status' => 'error', 'error_type' => 'deploy', 'role' => nodes_roles[uid] }
-          end
-          ctx.report_and_update_status('nodes' => nodes)
-        end if change_node_status
-        prev_summary = puppetd.last_run_summary
-        puppetd_runonce(puppetd, uids, nodes)
-        nodes_to_check = uids
-        last_run = puppetd.last_run_summary
-        while nodes_to_check.any?
-          calc_nodes = calc_nodes_status(last_run, prev_summary)
+        prev_summary = puppetd(nodes_to_check).last_run_summary
+        hung_nodes = puppetd_runonce(nodes_to_check)
+
+        while nodes_to_check.present?
+          last_run = puppetd(nodes_to_check).last_run_summary
+          calc_nodes = calc_nodes_status(last_run, prev_summary, hung_nodes)
           Astute.logger.debug "Nodes statuses: #{calc_nodes.inspect}"
 
-          # At least we will report about successfully deployed nodes
-          nodes_to_report = []
-          if change_node_status
-            nodes_to_report.concat(calc_nodes['succeed'].map do |uid| 
-              { 'uid' => uid, 'status' => 'ready', 'role' => nodes_roles[uid] }
-            end)
-          end
+          report_succeed = processing_succeed_nodes calc_nodes['succeed']
+          report_error, nodes_to_retry = processing_error_nodes(calc_nodes['error'])
+          report_running = processing_running_nodes(calc_nodes['running'])
 
-          # Process retries
-          nodes_to_retry = []
-          calc_nodes['error'].each do |uid|
-            if node_retries[uid] > 0
-              node_retries[uid] -= 1
-              Astute.logger.debug "Puppet on node #{uid.inspect} will be restarted. "\
-                                  "#{node_retries[uid]} retries remained."
-              nodes_to_retry << uid
-            else
-              Astute.logger.debug "Node #{uid.inspect} has failed to deploy. There is no more retries for puppet run."
-              nodes_to_report << {'uid' => uid, 'status' => 'error', 'error_type' => 'deploy', 'role' => nodes_roles[uid] } if change_node_status
-            end
-          end
-          if nodes_to_retry.any?
+          nodes_to_report = report_succeed + report_error + report_running
+          @ctx.report_and_update_status('nodes' => nodes_to_report) if nodes_to_report.present?
+
+          if nodes_to_retry.present?
             Astute.logger.info "Retrying to run puppet for following error nodes: #{nodes_to_retry.join(',')}"
-            puppetd_runonce(puppetd, nodes_to_retry, nodes)
+            hung_nodes = puppetd_runonce(nodes_to_retry)
             # We need this magic with prev_summary to reflect new puppetd run statuses..
             prev_summary.delete_if { |x| nodes_to_retry.include?(x.results[:sender]) }
             prev_summary += last_run.select { |x| nodes_to_retry.include?(x.results[:sender]) }
           end
-          # /end of processing retries
-
-          if calc_nodes['running'].any?
-            begin
-              # Pass nodes because logs calculation needs IP address of node, not just uid
-              nodes_progress = ctx.deploy_log_parser.progress_calculate(calc_nodes['running'], nodes)
-              if nodes_progress.any?
-                Astute.logger.debug "Got progress for nodes: #{nodes_progress.inspect}"
-                # Nodes with progress are running, so they are not included in nodes_to_report yet
-                nodes_progress.map! { |x| x.merge!('status' => 'deploying', 'role' => nodes_roles[x['uid']]) }
-                nodes_to_report += nodes_progress
-              end
-            rescue => e
-              Astute.logger.warn "Some error occurred when parse logs for nodes progress: #{e.message}, "\
-                                 "trace: #{e.format_backtrace}"
-            end
-          end
-          
-          ctx.report_and_update_status('nodes' => nodes_to_report) if nodes_to_report.any?
 
           # we will iterate only over running nodes and those that we restart deployment for
           nodes_to_check = calc_nodes['running'] + nodes_to_retry
-          break if nodes_to_check.empty?
 
+          break if nodes_to_check.empty?
           sleep Astute.config.PUPPET_DEPLOY_INTERVAL
-          puppetd.discover(:nodes => nodes_to_check)
-          last_run = puppetd.last_run_summary
         end
       end
-      time_spent = Time.now - time_before
-      Astute.logger.info "#{ctx.task_id}: Spent #{time_spent} seconds on puppet run "\
-                         "for following nodes(uids): #{nodes.map {|n| n['uid']}.join(',')}"
     end
+
   end
 end
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
index 745baba3..f975f512 100644
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -33,6 +33,7 @@ Dir[File.join(File.dirname(__FILE__), 'unit/fixtures/*.rb')].each { |file| requi
 #               resetting time to sleep significantly increases tests speed
 Astute.config.PUPPET_DEPLOY_INTERVAL = 0
 Astute.config.PUPPET_FADE_INTERVAL = 0
+Astute.config.PUPPET_FADE_TIMEOUT = 1
 Astute.config.MC_RETRY_INTERVAL = 0
 Astute.config.PROVISIONING_TIMEOUT = 0
 Astute.config.REBOOT_TIMEOUT = 0
diff --git a/spec/unit/nailyfact_deploy_spec.rb b/spec/unit/nailyfact_deploy_spec.rb
index 42abf179..fa05baed 100644
--- a/spec/unit/nailyfact_deploy_spec.rb
+++ b/spec/unit/nailyfact_deploy_spec.rb
@@ -60,7 +60,7 @@ describe "NailyFact DeploymentEngine" do
 
       it "it should not raise an exception if deployment mode is unknown" do
         deploy_engine.expects(:upload_facts).times(deploy_data.size)
-        Astute::PuppetdDeployer.stubs(:deploy).with(ctx, deploy_data, instance_of(Fixnum), true).once
+        Astute::PuppetdDeployer.stubs(:deploy).with(ctx, deploy_data, instance_of(Fixnum)).once
         expect {deploy_engine.deploy(deploy_data)}.to_not raise_exception
       end
     end
@@ -74,8 +74,8 @@ describe "NailyFact DeploymentEngine" do
         deploy_engine.expects(:upload_facts).times(deploy_data.size)
 
         # we got two calls, one for controller (high priority), and another for all computes (same low priority)
-        Astute::PuppetdDeployer.expects(:deploy).with(ctx, controller_nodes, instance_of(Fixnum), true).once
-        Astute::PuppetdDeployer.expects(:deploy).with(ctx, compute_nodes, instance_of(Fixnum), true).once
+        Astute::PuppetdDeployer.expects(:deploy).with(ctx, controller_nodes, instance_of(Fixnum)).once
+        Astute::PuppetdDeployer.expects(:deploy).with(ctx, compute_nodes, instance_of(Fixnum)).once
 
         expect {deploy_engine.deploy(deploy_data)}.to_not raise_exception
       end
@@ -122,12 +122,12 @@ describe "NailyFact DeploymentEngine" do
         deploy_engine.expects(:upload_facts).at_least_once
 
         primary_controller = deploy_data.find { |n| n['role'] == 'primary-controller' }
-        Astute::PuppetdDeployer.expects(:deploy).with(ctx, [primary_controller], 2, true).once
+        Astute::PuppetdDeployer.expects(:deploy).with(ctx, [primary_controller], 2).once
 
         controller_nodes.each do |n|
-          Astute::PuppetdDeployer.expects(:deploy).with(ctx, [n], 2, true).once
+          Astute::PuppetdDeployer.expects(:deploy).with(ctx, [n], 2).once
         end
-        Astute::PuppetdDeployer.expects(:deploy).with(ctx, compute_nodes, instance_of(Fixnum), true).once
+        Astute::PuppetdDeployer.expects(:deploy).with(ctx, compute_nodes, instance_of(Fixnum)).once
 
         deploy_engine.deploy(deploy_data)
       end
diff --git a/spec/unit/puppetd_spec.rb b/spec/unit/puppetd_spec.rb
index 1e37a8c4..700f0314 100644
--- a/spec/unit/puppetd_spec.rb
+++ b/spec/unit/puppetd_spec.rb
@@ -22,15 +22,15 @@ describe "Puppetd" do
 
   context "PuppetdDeployer" do
     let(:reporter) { mock('reporter') }
-    
+
     let(:ctx) do
       Context.new("task id", ProxyReporter::DeploymentProxyReporter.new(reporter), Astute::LogParser::NoParsing.new)
     end
-    
+
     let(:nodes) { [{'uid' => '1', 'role' => 'compute'}] }
-    
+
     let(:rpcclient) { mock_rpcclient(nodes) }
-    
+
     let(:last_run_result) do
       {
         :statuscode =>0,
@@ -48,113 +48,94 @@ describe "Puppetd" do
         :sender=>"1"
       }
     end
-    
+
     let(:last_run_result_running) do
       res = deep_copy(last_run_result)
       res[:data].merge!(:status => 'running', :running => 1, :stopped => 0)
       res
     end
-    
+
     let(:last_run_result_fail) do
       res = deep_copy(last_run_result_running)
-      res[:data].merge!(:runtime => 1358426000, 
+      res[:data].merge!(:runtime => 1358426000,
                         :time => {"last_run" => 1358426000},
                         :resources => {"failed" => 1}
                        )
       res
     end
-    
+
     let(:last_run_failed) do
       res = deep_copy(last_run_result_fail)
       res[:data].merge!(:status => 'stopped', :stopped => 1, :running => 0)
       res
     end
-    
+
     let(:last_run_result_finished) do
       res = deep_copy last_run_result
       res[:data][:time]['last_run'] = 1358428000
       res[:data][:status] = 'stopped'
       res
     end
-    
+
     context 'reportet behavior' do
-      let(:last_run_result) do
-         {
-           :data=> {
-              :time=>{"last_run"=>1358425701},
-              :status => "running", 
-              :resources => {'failed' => 0},
-              :running => 1, 
-              :idling => 0
-            },
-            :sender=>"1"
-          }
-      end
-    
+
       let(:prepare_mcollective_env) do
         last_run_result_new = deep_copy last_run_result
         last_run_result_new[:data][:time]['last_run'] = 1358426000
-        
+
         rpcclient_new_res = mock_mc_result(last_run_result_new)
         rpcclient_finished_res = mock_mc_result(last_run_result_finished)
         rpcclient_valid_result = mock_mc_result(last_run_result)
 
         rpcclient.stubs(:last_run_summary).returns([rpcclient_valid_result]).then.
             returns([rpcclient_valid_result]).then.
+            returns([ mock_mc_result(last_run_result_running) ]).then.
+            returns([ mock_mc_result(last_run_result_running) ]).then.
             returns([rpcclient_new_res]).then.
             returns([rpcclient_finished_res])
-        
+
         rpcclient
       end
 
       it "reports ready status for node if puppet deploy finished successfully" do
         prepare_mcollective_env
-        
+
         reporter.expects(:report).with('nodes' => [{'uid' => '1', 'status' => 'ready', 'progress' => 100, 'role' => 'compute'}])
         rpcclient.expects(:runonce).at_least_once.returns([mock_mc_result(last_run_result)])
-        
+
         Astute::PuppetdDeployer.deploy(ctx, nodes, retries=0)
       end
 
-      it "doesn't report ready status for node if change_node_status disabled" do
-        prepare_mcollective_env
-        
-        reporter.expects(:report).never
-        rpcclient.expects(:runonce).at_least_once.returns([mock_mc_result(last_run_result)])
-        
-        Astute::PuppetdDeployer.deploy(ctx, nodes, retries=0, change_node_status=false)
-      end
-      
       context 'multiroles behavior' do
         let(:nodes) { [{'uid' => '1', 'role' => 'compute'}] }
         let(:nodes_multiroles) { [{'uid' => '1', 'role' => 'controller'}] }
         before(:each) do
-          @ctx = Context.new("task id", 
-                             ProxyReporter::DeploymentProxyReporter.new(reporter, nodes + nodes_multiroles), 
+          @ctx = Context.new("task id",
+                             ProxyReporter::DeploymentProxyReporter.new(reporter, nodes + nodes_multiroles),
                              Astute::LogParser::NoParsing.new
                             )
         end
-        
+
         it "it should not send final status before all roles of node will deploy" do
           prepare_mcollective_env
-          
-          reporter.expects(:report).with('nodes' => [{'uid' => '1', 'status' => 'deploying', 'progress' => 50, 'role' => 'compute'}])          
+
+          reporter.expects(:report).with('nodes' => [{'uid' => '1', 'status' => 'deploying', 'progress' => 50, 'role' => 'compute'}])
           rpcclient.expects(:runonce).at_least_once.returns([mock_mc_result(last_run_result)])
-        
+
           Astute::PuppetdDeployer.deploy(@ctx, nodes, retries=0)
         end
       end
-      
+
     end
 
     context "puppet state transitions" do
-      
+
       let(:last_run_result_idle_pre) do
         res = deep_copy(last_run_result)
         res[:data].merge!(:status => 'idling', :idling => 1, :stopped => 0)
         res
       end
-      
+
       let(:last_run_result_idle_post) do
         res = deep_copy(last_run_result_fail)
         res[:data].merge!(:status => 'idling', :idling => 1, :running => 0)
@@ -172,11 +153,11 @@ describe "Puppetd" do
           returns([ mock_mc_result(last_run_result_fail) ]).then.
           returns([ mock_mc_result(last_run_result_fail) ]).then.
           returns([ mock_mc_result(last_run_failed) ])
-        
+
         reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}])
         rpcclient.expects(:runonce).once.
           returns([ mock_mc_result(last_run_result) ])
-        
+
         Astute::PuppetdDeployer.deploy(ctx, nodes, 0)
       end
 
@@ -188,11 +169,11 @@ describe "Puppetd" do
           returns([ mock_mc_result(last_run_result_running) ]).then.
           returns([ mock_mc_result(last_run_result_fail) ]).then.
           returns([ mock_mc_result(last_run_failed) ])
-        
+
         reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}])
         rpcclient.expects(:runonce).once.
           returns([ mock_mc_result(last_run_result) ])
-        
+
         Astute::PuppetdDeployer.deploy(ctx, nodes, 0)
       end
 
@@ -202,26 +183,26 @@ describe "Puppetd" do
           returns([ mock_mc_result(last_run_result) ]).then.
           returns([ mock_mc_result(last_run_result_running) ]).then.
           returns([ mock_mc_result(last_run_failed) ])
-        
+
         reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}])
         rpcclient.expects(:runonce).once.
           returns([ mock_mc_result(last_run_result) ])
-        
+
         Astute::PuppetdDeployer.deploy(ctx, nodes, 0)
       end
 
-      it "publishes error status for node if puppet failed (a cycle w/ one running state only)" do
+      it "publishes error status for node if puppet failed (a cycle with one running state only)" do
         rpcclient.stubs(:last_run_summary).times(5).
           returns([ mock_mc_result(last_run_result) ]).then.
           returns([ mock_mc_result(last_run_result) ]).then.
           returns([ mock_mc_result(last_run_result_running) ]).then.
           returns([ mock_mc_result(last_run_result_fail) ]).then.
           returns([ mock_mc_result(last_run_failed) ])
-        
+
         reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}])
         rpcclient.expects(:runonce).once.
           returns([ mock_mc_result(last_run_result) ])
-        
+
         Astute::PuppetdDeployer.deploy(ctx, nodes, 0)
       end
 
@@ -230,30 +211,55 @@ describe "Puppetd" do
           returns([ mock_mc_result(last_run_result) ]).then.
           returns([ mock_mc_result(last_run_result) ]).then.
           returns([ mock_mc_result(last_run_failed) ])
-        
+
         reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}])
         rpcclient.expects(:runonce).once.
           returns([ mock_mc_result(last_run_result) ])
-        
+
         Astute::PuppetdDeployer.deploy(ctx, nodes, 0)
       end
-    end
 
-    it "doesn't publish error status for node if change_node_status disabled" do
-      reporter.expects(:report).never
+      context '' do
+        around(:each) do |example|
+          old_value = Astute.config.PUPPET_FADE_INTERVAL
+          example.run
+          Astute.config.PUPPET_FADE_INTERVAL = old_value
+        end
 
-      rpcclient_valid_result = mock_mc_result(last_run_result)
-      rpcclient_new_res = mock_mc_result(last_run_result_fail)
-      rpcclient_finished_res = mock_mc_result(last_run_failed)
+        before(:each) do
+          Astute.config.PUPPET_FADE_INTERVAL = 1
+        end
 
-      rpcclient.stubs(:last_run_summary).returns([rpcclient_valid_result]).then.
-          returns([rpcclient_valid_result]).then.
-          returns([rpcclient_new_res]).then.
-          returns([rpcclient_finished_res])
-      rpcclient.expects(:runonce).at_least_once.returns([rpcclient_valid_result])
+        it "publishes error status for node if puppet running alien task (attempts been exhausted)" do
+          rpcclient.stubs(:last_run_summary).at_least(3).
+            returns([ mock_mc_result(last_run_result_running) ]).then.
+            returns([ mock_mc_result(last_run_result_running) ]).then.
+            returns([ mock_mc_result(last_run_result_running) ]).then.
+            returns([ mock_mc_result(last_run_result_running) ])
+
+          reporter.expects(:report).with('nodes' => [{'status' => 'error', 'error_type' => 'deploy', 'uid' => '1', 'role' => 'compute'}])
+          rpcclient.expects(:runonce).never
+
+          Astute::PuppetdDeployer.deploy(ctx, nodes, 0)
+        end
+
+        it "ignore exit code of puppet running of alien task (waited for alien task stop and launched own)" do
+          rpcclient.stubs(:last_run_summary).at_least(3).
+            returns([ mock_mc_result(last_run_result_running) ]).then.
+            returns([ mock_mc_result(last_run_result_running) ]).then.
+            returns([ mock_mc_result(last_run_failed) ]).then.
+            returns([ mock_mc_result(last_run_failed) ]).then.
+            returns([ mock_mc_result(last_run_result_running) ]).then.
+            returns([ mock_mc_result(last_run_result_running) ]).then.
+            returns([ mock_mc_result(last_run_result_finished) ])
+
+          rpcclient.expects(:runonce).at_least(1).returns([ mock_mc_result(last_run_result) ])
+          reporter.expects(:report).with('nodes' => [{'uid' => '1', 'status' => 'ready', 'progress' => 100, 'role' => 'compute'}])
+
+          Astute::PuppetdDeployer.deploy(ctx, nodes, 1)
+        end
+      end
 
-      MClient.any_instance.stubs(:rpcclient).returns(rpcclient)
-      Astute::PuppetdDeployer.deploy(ctx, nodes, retries=0, change_node_status=false)
     end
 
     it "retries to run puppet if it fails" do
@@ -268,7 +274,7 @@ describe "Puppetd" do
           returns([rpcclient_failed]).then.
           returns([rpcclient_fail]).then.
           returns([rpcclient_succeed])
-      
+
       reporter.expects(:report).with('nodes' => [{'uid' => '1', 'status' => 'ready', 'progress' => 100, 'role' => 'compute'}])
       rpcclient.expects(:runonce).at_least_once.returns([rpcclient_valid_result])