New behavior for idealing puppet state
* Kill process instead of send USR1 signal; * Increase time to examine running process from 60 to 120 sec; * Increase time between last_run request from 5 to 10 sec. Change-Id: I3fce9ac162a0201f7dc7062dead3350f55e350fc Closes-Bug: #1261276
This commit is contained in:
parent
75aa0877cb
commit
f655181184
|
@ -15,8 +15,8 @@ PUPPET_DEPLOY_INTERVAL: 2
|
||||||
# After Puppet agent has finished real work it spend some time to graceful exit.
|
# After Puppet agent has finished real work it spend some time to graceful exit.
|
||||||
# PUPPET_FADE_TIMEOUT means how long (in seconds) Astute can take for Puppet
|
# PUPPET_FADE_TIMEOUT means how long (in seconds) Astute can take for Puppet
|
||||||
# to exit after real work has finished.
|
# to exit after real work has finished.
|
||||||
PUPPET_FADE_TIMEOUT: 60
|
PUPPET_FADE_TIMEOUT: 120
|
||||||
# PUPPET_FADE_INTERVAL is used in puppetd.rb file.
|
# PUPPET_FADE_INTERVAL is used in puppetd.rb file.
|
||||||
# Retry every PUPPET_FADE_INTERVAL seconds to check puppet state if it was
|
# Retry every PUPPET_FADE_INTERVAL seconds to check puppet state if it was
|
||||||
# in 'running' state.
|
# in 'running' state.
|
||||||
PUPPET_FADE_INTERVAL: 5
|
PUPPET_FADE_INTERVAL: 10
|
||||||
|
|
|
@ -55,10 +55,10 @@ module Astute
|
||||||
conf = {}
|
conf = {}
|
||||||
conf[:PUPPET_TIMEOUT] = 60 * 60 # maximum time it waits for the whole deployment
|
conf[:PUPPET_TIMEOUT] = 60 * 60 # maximum time it waits for the whole deployment
|
||||||
conf[:PUPPET_DEPLOY_INTERVAL] = 2 # sleep for ## sec, then check puppet status again
|
conf[:PUPPET_DEPLOY_INTERVAL] = 2 # sleep for ## sec, then check puppet status again
|
||||||
conf[:PUPPET_FADE_TIMEOUT] = 60 # how long it can take for puppet to exit after dumping to last_run_summary
|
conf[:PUPPET_FADE_TIMEOUT] = 120 # how long it can take for puppet to exit after dumping to last_run_summary
|
||||||
conf[:MC_RETRIES] = 5 # MClient tries to call mcagent before failure
|
conf[:MC_RETRIES] = 5 # MClient tries to call mcagent before failure
|
||||||
conf[:MC_RETRY_INTERVAL] = 1 # MClient sleeps for ## sec between retries
|
conf[:MC_RETRY_INTERVAL] = 1 # MClient sleeps for ## sec between retries
|
||||||
conf[:PUPPET_FADE_INTERVAL] = 5 # retry every ## seconds to check puppet state if it was running
|
conf[:PUPPET_FADE_INTERVAL] = 10 # retry every ## seconds to check puppet state if it was running
|
||||||
conf[:PROVISIONING_TIMEOUT] = 90 * 60 # timeout for booting target OS in provision
|
conf[:PROVISIONING_TIMEOUT] = 90 * 60 # timeout for booting target OS in provision
|
||||||
conf[:REBOOT_TIMEOUT] = 120 # how long it can take for node to reboot
|
conf[:REBOOT_TIMEOUT] = 120 # how long it can take for node to reboot
|
||||||
|
|
||||||
|
|
|
@ -91,13 +91,12 @@ module Astute
|
||||||
# but we should to turn it on only in error_nodes
|
# but we should to turn it on only in error_nodes
|
||||||
succeed_nodes -= hung_nodes
|
succeed_nodes -= hung_nodes
|
||||||
error_nodes = (error_nodes + hung_nodes).uniq
|
error_nodes = (error_nodes + hung_nodes).uniq
|
||||||
running_nodes = last_run.map {|n| n.results[:sender]} - stopped_nodes - hung_nodes
|
running_nodes -= hung_nodes
|
||||||
|
|
||||||
|
|
||||||
nodes_to_check = running_nodes + succeed_nodes + error_nodes
|
nodes_to_check = running_nodes + succeed_nodes + error_nodes
|
||||||
unless nodes_to_check.size == last_run.size
|
all_nodes = last_run.map { |n| n.results[:sender] }
|
||||||
raise "Should never happen. Internal error in nodes statuses calculation. Statuses calculated for: #{nodes_to_check.inspect},"
|
if nodes_to_check.size != all_nodes.size
|
||||||
"nodes passed to check statuses of: #{last_run.map {|n| n.results[:sender]}}"
|
raise "Internal error. Check: #{nodes_to_check.inspect}, passed #{all_nodes.inspect}"
|
||||||
end
|
end
|
||||||
{'succeed' => succeed_nodes, 'error' => error_nodes, 'running' => running_nodes}
|
{'succeed' => succeed_nodes, 'error' => error_nodes, 'running' => running_nodes}
|
||||||
end
|
end
|
||||||
|
|
|
@ -148,10 +148,12 @@ module MCollective
|
||||||
when 'idling' then # signal daemon
|
when 'idling' then # signal daemon
|
||||||
pid = puppet_agent_pid
|
pid = puppet_agent_pid
|
||||||
begin
|
begin
|
||||||
::Process.kill('USR1', pid)
|
::Process.kill('INT', pid)
|
||||||
reply[:output] = "Signalled daemonized puppet to run (process #{pid}); " + (reply[:output] || '')
|
rescue Errno::ESRCH => e
|
||||||
rescue => ex
|
reply[:err_msg] = "Failed to signal the puppet apply daemon (process #{pid}): #{e}"
|
||||||
reply.fail "Failed to signal the puppet daemon (process #{pid}): #{ex}"
|
ensure
|
||||||
|
runonce_background
|
||||||
|
reply[:output] = "Kill old idling puppet process #{pid})." + (reply[:output] || '')
|
||||||
end
|
end
|
||||||
|
|
||||||
when 'stopped' then # just run
|
when 'stopped' then # just run
|
||||||
|
|
Loading…
Reference in New Issue