From f6551811846c9e21d9bee04562e4eef4347b22c0 Mon Sep 17 00:00:00 2001 From: Vladimir Date: Mon, 16 Dec 2013 16:25:45 +0400 Subject: [PATCH] New behavior for idealing puppet state * Kill process instead of send USR1 signal; * Increase time to examine running process from 60 to 120 sec; * Increase time between last_run request from 5 to 10 sec. Change-Id: I3fce9ac162a0201f7dc7062dead3350f55e350fc Closes-Bug: #1261276 --- examples/example_astute_config.yaml | 4 ++-- lib/astute/config.rb | 4 ++-- lib/astute/puppetd.rb | 9 ++++----- mcagents/puppetd.rb | 10 ++++++---- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/examples/example_astute_config.yaml b/examples/example_astute_config.yaml index c93e3ddb..08870bc3 100644 --- a/examples/example_astute_config.yaml +++ b/examples/example_astute_config.yaml @@ -15,8 +15,8 @@ PUPPET_DEPLOY_INTERVAL: 2 # After Puppet agent has finished real work it spend some time to graceful exit. # PUPPET_FADE_TIMEOUT means how long (in seconds) Astute can take for Puppet # to exit after real work has finished. -PUPPET_FADE_TIMEOUT: 60 +PUPPET_FADE_TIMEOUT: 120 # PUPPET_FADE_INTERVAL is used in puppetd.rb file. # Retry every PUPPET_FADE_INTERVAL seconds to check puppet state if it was # in 'running' state. -PUPPET_FADE_INTERVAL: 5 +PUPPET_FADE_INTERVAL: 10 diff --git a/lib/astute/config.rb b/lib/astute/config.rb index 802e49f7..791f9fb6 100644 --- a/lib/astute/config.rb +++ b/lib/astute/config.rb @@ -55,10 +55,10 @@ module Astute conf = {} conf[:PUPPET_TIMEOUT] = 60 * 60 # maximum time it waits for the whole deployment conf[:PUPPET_DEPLOY_INTERVAL] = 2 # sleep for ## sec, then check puppet status again - conf[:PUPPET_FADE_TIMEOUT] = 60 # how long it can take for puppet to exit after dumping to last_run_summary + conf[:PUPPET_FADE_TIMEOUT] = 120 # how long it can take for puppet to exit after dumping to last_run_summary conf[:MC_RETRIES] = 5 # MClient tries to call mcagent before failure conf[:MC_RETRY_INTERVAL] = 1 # MClient sleeps for ## sec between retries - conf[:PUPPET_FADE_INTERVAL] = 5 # retry every ## seconds to check puppet state if it was running + conf[:PUPPET_FADE_INTERVAL] = 10 # retry every ## seconds to check puppet state if it was running conf[:PROVISIONING_TIMEOUT] = 90 * 60 # timeout for booting target OS in provision conf[:REBOOT_TIMEOUT] = 120 # how long it can take for node to reboot diff --git a/lib/astute/puppetd.rb b/lib/astute/puppetd.rb index 4648f90b..589bc731 100644 --- a/lib/astute/puppetd.rb +++ b/lib/astute/puppetd.rb @@ -91,13 +91,12 @@ module Astute # but we should to turn it on only in error_nodes succeed_nodes -= hung_nodes error_nodes = (error_nodes + hung_nodes).uniq - running_nodes = last_run.map {|n| n.results[:sender]} - stopped_nodes - hung_nodes - + running_nodes -= hung_nodes nodes_to_check = running_nodes + succeed_nodes + error_nodes - unless nodes_to_check.size == last_run.size - raise "Should never happen. Internal error in nodes statuses calculation. Statuses calculated for: #{nodes_to_check.inspect}," - "nodes passed to check statuses of: #{last_run.map {|n| n.results[:sender]}}" + all_nodes = last_run.map { |n| n.results[:sender] } + if nodes_to_check.size != all_nodes.size + raise "Internal error. Check: #{nodes_to_check.inspect}, passed #{all_nodes.inspect}" end {'succeed' => succeed_nodes, 'error' => error_nodes, 'running' => running_nodes} end diff --git a/mcagents/puppetd.rb b/mcagents/puppetd.rb index 810f5664..66100e0b 100644 --- a/mcagents/puppetd.rb +++ b/mcagents/puppetd.rb @@ -148,10 +148,12 @@ module MCollective when 'idling' then # signal daemon pid = puppet_agent_pid begin - ::Process.kill('USR1', pid) - reply[:output] = "Signalled daemonized puppet to run (process #{pid}); " + (reply[:output] || '') - rescue => ex - reply.fail "Failed to signal the puppet daemon (process #{pid}): #{ex}" + ::Process.kill('INT', pid) + rescue Errno::ESRCH => e + reply[:err_msg] = "Failed to signal the puppet apply daemon (process #{pid}): #{e}" + ensure + runonce_background + reply[:output] = "Kill old idling puppet process #{pid})." + (reply[:output] || '') end when 'stopped' then # just run