Use async shell call for provision
This change allow to use async shell task based on
puppet to run provision commands.
It is transition change between old run way of image
provision and provision as graph which will also
used async shell to run.
It is more fault tolerance way to provision because
temporary problem with connection between master node
and provisioning node do not block or fail provision.
Important notice: it is allow only if bootstrap image
has puppet and daemonize packages which is true for 9.2
or higher releases.
Change-Id: Ie634fae9b63bf0c103ec8926647af75b57cefe23
Related-Bug: #1644618
(cherry picked from commit dc47550460
)
This commit is contained in:
parent
1b86a47c9c
commit
9d7ba716fc
|
@ -108,6 +108,7 @@ module Astute
|
|||
conf[:graph_dot_dir] = "/var/lib/astute/graphs" # default dir patch for debug graph file
|
||||
conf[:enable_graph_file] = true # enable debug graph records to file
|
||||
conf[:puppet_raw_report] = false # enable puppet detailed report
|
||||
conf[:task_poll_delay] = 1 # sleeps for ## sec between task status calls
|
||||
|
||||
# Server settings
|
||||
conf[:broker_host] = 'localhost'
|
||||
|
|
|
@ -75,19 +75,13 @@ module Astute
|
|||
Astute.logger.debug("#{ctx.task_id}: running provision script: " \
|
||||
"#{uids.join(', ')}")
|
||||
|
||||
results = run_shell_command(
|
||||
failed_uids |= run_shell_task(
|
||||
ctx,
|
||||
uids,
|
||||
'flock -n /var/lock/provision.lock provision',
|
||||
Astute.config.provisioning_timeout
|
||||
)
|
||||
|
||||
results.select{ |_node_id, result| !result }.keys.each do |node_id|
|
||||
failed_uids << node_id
|
||||
Astute.logger.error("#{ctx.task_id}: Provision command returned " \
|
||||
"non zero exit code on node: #{node_id}")
|
||||
end
|
||||
|
||||
failed_uids
|
||||
end
|
||||
|
||||
|
@ -122,24 +116,42 @@ module Astute
|
|||
).process
|
||||
end
|
||||
|
||||
def self.run_shell_command(context, node_uids, cmd, timeout=3600)
|
||||
shell = MClient.new(
|
||||
context,
|
||||
'execute_shell_command',
|
||||
node_uids,
|
||||
check_result=true,
|
||||
timeout=timeout,
|
||||
retries=1
|
||||
)
|
||||
|
||||
results = shell.execute(:cmd => cmd)
|
||||
results.inject({}) do |summary, node|
|
||||
summary.merge(node.results[:sender] => node.results[:data][:exit_code] == 0)
|
||||
def self.run_shell_task(ctx, node_uids, cmd, timeout=3600)
|
||||
shell_tasks = node_uids.inject([]) do |tasks, node_id|
|
||||
tasks << Shell.new(generate_shell_hook(node_id, cmd, timeout), ctx)
|
||||
end
|
||||
rescue MClientTimeout, MClientError => e
|
||||
Astute.logger.error("#{context.task_id}: cmd: #{cmd} " \
|
||||
"mcollective error: #{e.message}")
|
||||
{}
|
||||
|
||||
shell_tasks.each(&:run)
|
||||
|
||||
while shell_tasks.any? { |t| !t.finished? } do
|
||||
shell_tasks.select { |t| !t.finished? }.each(&:status)
|
||||
sleep Astute.config.task_poll_delay
|
||||
end
|
||||
|
||||
failed_uids = shell_tasks.select{ |t| t.failed? }.inject([]) do |task|
|
||||
Astute.logger.error("#{ctx.task_id}: Provision command returned " \
|
||||
"non zero exit code on node: #{task.node_id}")
|
||||
failed_uids << task.node_id
|
||||
end
|
||||
|
||||
failed_uids
|
||||
rescue => e
|
||||
Astute.logger.error("#{ctx.task_id}: cmd: #{cmd} " \
|
||||
"error: #{e.message}, trace #{e.backtrace}")
|
||||
node_uids
|
||||
end
|
||||
|
||||
def self.generate_shell_hook(node_id, cmd, timeout)
|
||||
{
|
||||
"node_id" => node_id,
|
||||
"id" => "provision_#{node_id}",
|
||||
"parameters" => {
|
||||
"cmd" => cmd,
|
||||
"cwd" => "/",
|
||||
"timeout" => timeout,
|
||||
"retries" => 0
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
end
|
||||
|
|
|
@ -61,7 +61,7 @@ module Astute
|
|||
def sync_run
|
||||
run
|
||||
loop do
|
||||
sleep 1
|
||||
sleep Astute.config.task_poll_delay
|
||||
status
|
||||
break if finished?
|
||||
end
|
||||
|
|
|
@ -31,6 +31,10 @@ module Astute
|
|||
{}
|
||||
end
|
||||
|
||||
def node_id
|
||||
@task['node_id']
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
SHELL_MANIFEST_DIR = '/etc/puppet/shell_manifests'
|
||||
|
|
|
@ -102,12 +102,12 @@ describe Astute::ImageProvision do
|
|||
.with([reboot_hook], ctx, 'provision')
|
||||
.returns(nailgun_hook)
|
||||
nailgun_hook.expects(:process).once
|
||||
provisioner.reboot(ctx, node_ids, task_id="reboot_provisioned_nodes")
|
||||
provisioner.reboot(ctx, node_ids, _task_id="reboot_provisioned_nodes")
|
||||
end
|
||||
|
||||
it 'should not run hook if no nodes present' do
|
||||
Astute::NailgunHooks.expects(:new).never
|
||||
provisioner.reboot(ctx, [], task_id="reboot_provisioned_nodes")
|
||||
provisioner.reboot(ctx, [], _task_id="reboot_provisioned_nodes")
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -184,24 +184,28 @@ describe Astute::ImageProvision do
|
|||
end
|
||||
|
||||
describe ".run_provision" do
|
||||
it 'should run provision on nodes using shell magent' do
|
||||
provisioner.expects(:run_shell_command).once.with(
|
||||
ctx,
|
||||
nodes.map { |n| n['uid'] },
|
||||
'flock -n /var/lock/provision.lock provision',
|
||||
Astute.config.provisioning_timeout
|
||||
).returns({5 => true, 6 => true})
|
||||
before do
|
||||
provisioner.stubs(:sleep)
|
||||
end
|
||||
|
||||
provisioner.run_provision(ctx, nodes.map { |n| n['uid'] }, [])
|
||||
it 'should run provision on nodes using shell magent' do
|
||||
Astute::Shell.any_instance.stubs(:process)
|
||||
Astute::Shell.any_instance.expects(:run).once
|
||||
Astute::Shell.any_instance.expects(:finished?).times(3)
|
||||
.returns(false).
|
||||
then.returns(true)
|
||||
Astute::Shell.any_instance.expects(:failed?).once.returns(false)
|
||||
|
||||
provisioner.run_provision(ctx, [5], [])
|
||||
end
|
||||
|
||||
it 'should run return failed nodes' do
|
||||
provisioner.stubs(:run_shell_command).once.returns({5 => true, 6 => false})
|
||||
provisioner.stubs(:run_shell_task).once.returns([6])
|
||||
expect(provisioner.run_provision(ctx, nodes.map { |n| n['uid'] }, [])).to eql([6])
|
||||
end
|
||||
|
||||
it 'should not erase info about alread failed nodes' do
|
||||
provisioner.stubs(:run_shell_command).once.returns({5 => true, 6 => false})
|
||||
provisioner.stubs(:run_shell_task).once.returns([6])
|
||||
failed_uids = [3]
|
||||
expect(provisioner.run_provision(
|
||||
ctx,
|
||||
|
@ -212,4 +216,3 @@ describe Astute::ImageProvision do
|
|||
end
|
||||
|
||||
end
|
||||
|
||||
|
|
Loading…
Reference in New Issue