Use async shell call for provision
This change allow to use async shell task based on puppet to run provision commands. It is transition change between old run way of image provision and provision as graph which will also used async shell to run. It is more fault tolerance way to provision because temporary problem with connection between master node and provisioning node do not block or fail provision. Important notice: it is allow only if bootstrap image has puppet and daemonize packages which is true for 9.2 or higher releases. Change-Id: Ie634fae9b63bf0c103ec8926647af75b57cefe23 Related-Bug: #1644618
This commit is contained in:
parent
1dc4d754b7
commit
dc47550460
|
@ -108,6 +108,7 @@ module Astute
|
|||
conf[:graph_dot_dir] = "/var/lib/astute/graphs" # default dir patch for debug graph file
|
||||
conf[:enable_graph_file] = true # enable debug graph records to file
|
||||
conf[:puppet_raw_report] = false # enable puppet detailed report
|
||||
conf[:task_poll_delay] = 1 # sleeps for ## sec between task status calls
|
||||
|
||||
# Server settings
|
||||
conf[:broker_host] = 'localhost'
|
||||
|
|
|
@ -75,19 +75,13 @@ module Astute
|
|||
Astute.logger.debug("#{ctx.task_id}: running provision script: " \
|
||||
"#{uids.join(', ')}")
|
||||
|
||||
results = run_shell_command(
|
||||
failed_uids |= run_shell_task(
|
||||
ctx,
|
||||
uids,
|
||||
'flock -n /var/lock/provision.lock provision',
|
||||
Astute.config.provisioning_timeout
|
||||
)
|
||||
|
||||
results.select{ |_node_id, result| !result }.keys.each do |node_id|
|
||||
failed_uids << node_id
|
||||
Astute.logger.error("#{ctx.task_id}: Provision command returned " \
|
||||
"non zero exit code on node: #{node_id}")
|
||||
end
|
||||
|
||||
failed_uids
|
||||
end
|
||||
|
||||
|
@ -122,24 +116,42 @@ module Astute
|
|||
).process
|
||||
end
|
||||
|
||||
def self.run_shell_command(context, node_uids, cmd, timeout=3600)
|
||||
shell = MClient.new(
|
||||
context,
|
||||
'execute_shell_command',
|
||||
node_uids,
|
||||
check_result=true,
|
||||
timeout=timeout,
|
||||
retries=1
|
||||
)
|
||||
|
||||
results = shell.execute(:cmd => cmd)
|
||||
results.inject({}) do |summary, node|
|
||||
summary.merge(node.results[:sender] => node.results[:data][:exit_code] == 0)
|
||||
def self.run_shell_task(ctx, node_uids, cmd, timeout=3600)
|
||||
shell_tasks = node_uids.inject([]) do |tasks, node_id|
|
||||
tasks << Shell.new(generate_shell_hook(node_id, cmd, timeout), ctx)
|
||||
end
|
||||
rescue MClientTimeout, MClientError => e
|
||||
Astute.logger.error("#{context.task_id}: cmd: #{cmd} " \
|
||||
"mcollective error: #{e.message}")
|
||||
{}
|
||||
|
||||
shell_tasks.each(&:run)
|
||||
|
||||
while shell_tasks.any? { |t| !t.finished? } do
|
||||
shell_tasks.select { |t| !t.finished? }.each(&:status)
|
||||
sleep Astute.config.task_poll_delay
|
||||
end
|
||||
|
||||
failed_uids = shell_tasks.select{ |t| t.failed? }.inject([]) do |task|
|
||||
Astute.logger.error("#{ctx.task_id}: Provision command returned " \
|
||||
"non zero exit code on node: #{task.node_id}")
|
||||
failed_uids << task.node_id
|
||||
end
|
||||
|
||||
failed_uids
|
||||
rescue => e
|
||||
Astute.logger.error("#{ctx.task_id}: cmd: #{cmd} " \
|
||||
"error: #{e.message}, trace #{e.backtrace}")
|
||||
node_uids
|
||||
end
|
||||
|
||||
def self.generate_shell_hook(node_id, cmd, timeout)
|
||||
{
|
||||
"node_id" => node_id,
|
||||
"id" => "provision_#{node_id}",
|
||||
"parameters" => {
|
||||
"cmd" => cmd,
|
||||
"cwd" => "/",
|
||||
"timeout" => timeout,
|
||||
"retries" => 0
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
end
|
||||
|
|
|
@ -61,7 +61,7 @@ module Astute
|
|||
def sync_run
|
||||
run
|
||||
loop do
|
||||
sleep 1
|
||||
sleep Astute.config.task_poll_delay
|
||||
status
|
||||
break if finished?
|
||||
end
|
||||
|
|
|
@ -31,6 +31,10 @@ module Astute
|
|||
{}
|
||||
end
|
||||
|
||||
def node_id
|
||||
@task['node_id']
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
SHELL_MANIFEST_DIR = '/etc/puppet/shell_manifests'
|
||||
|
|
|
@ -102,12 +102,12 @@ describe Astute::ImageProvision do
|
|||
.with([reboot_hook], ctx, 'provision')
|
||||
.returns(nailgun_hook)
|
||||
nailgun_hook.expects(:process).once
|
||||
provisioner.reboot(ctx, node_ids, task_id="reboot_provisioned_nodes")
|
||||
provisioner.reboot(ctx, node_ids, _task_id="reboot_provisioned_nodes")
|
||||
end
|
||||
|
||||
it 'should not run hook if no nodes present' do
|
||||
Astute::NailgunHooks.expects(:new).never
|
||||
provisioner.reboot(ctx, [], task_id="reboot_provisioned_nodes")
|
||||
provisioner.reboot(ctx, [], _task_id="reboot_provisioned_nodes")
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -184,24 +184,28 @@ describe Astute::ImageProvision do
|
|||
end
|
||||
|
||||
describe ".run_provision" do
|
||||
it 'should run provision on nodes using shell magent' do
|
||||
provisioner.expects(:run_shell_command).once.with(
|
||||
ctx,
|
||||
nodes.map { |n| n['uid'] },
|
||||
'flock -n /var/lock/provision.lock provision',
|
||||
Astute.config.provisioning_timeout
|
||||
).returns({5 => true, 6 => true})
|
||||
before do
|
||||
provisioner.stubs(:sleep)
|
||||
end
|
||||
|
||||
provisioner.run_provision(ctx, nodes.map { |n| n['uid'] }, [])
|
||||
it 'should run provision on nodes using shell magent' do
|
||||
Astute::Shell.any_instance.stubs(:process)
|
||||
Astute::Shell.any_instance.expects(:run).once
|
||||
Astute::Shell.any_instance.expects(:finished?).times(3)
|
||||
.returns(false).
|
||||
then.returns(true)
|
||||
Astute::Shell.any_instance.expects(:failed?).once.returns(false)
|
||||
|
||||
provisioner.run_provision(ctx, [5], [])
|
||||
end
|
||||
|
||||
it 'should run return failed nodes' do
|
||||
provisioner.stubs(:run_shell_command).once.returns({5 => true, 6 => false})
|
||||
provisioner.stubs(:run_shell_task).once.returns([6])
|
||||
expect(provisioner.run_provision(ctx, nodes.map { |n| n['uid'] }, [])).to eql([6])
|
||||
end
|
||||
|
||||
it 'should not erase info about alread failed nodes' do
|
||||
provisioner.stubs(:run_shell_command).once.returns({5 => true, 6 => false})
|
||||
provisioner.stubs(:run_shell_task).once.returns([6])
|
||||
failed_uids = [3]
|
||||
expect(provisioner.run_provision(
|
||||
ctx,
|
||||
|
@ -212,4 +216,3 @@ describe Astute::ImageProvision do
|
|||
end
|
||||
|
||||
end
|
||||
|
||||
|
|
Loading…
Reference in New Issue