Update erase_node deletion process

When mcollective is launched via Upstart, the current implementation
of the filesystem erase code is not being executed as part of the
SIGTERM handler that is called when we attempt to SIGTERM all
processes. This change pulls the majority of the code we use to
erase a system out of a child thread into the main agent action. This
change leaves the reboot in a child process and SIGTERM handler so
that astute will get the response for the erase_node command and will
mark it complete. An added benefit of this change is that the debug
messages are now returned back to astute and show up in the astute.log
where previously they were being silently discared.

Change-Id: Ib9ee7db0a4d872ea5d1485ec036940f12b0d251e
Closes-Bug: 1461074
This commit is contained in:
Alex Schultz 2015-06-03 15:13:08 -05:00
parent cbae24e990
commit 1b8588b6ea
1 changed files with 34 additions and 34 deletions

View File

@ -100,43 +100,43 @@ module MCollective
end
def reboot
debug_msg("Run node rebooting command using 'SB' to sysrq-trigger")
File.open('/proc/sys/kernel/sysrq','w') { |file| file.write("1\n") }
# turning panic on oops and setting panic timeout to 10
File.open('/proc/sys/kernel/panic_on_oops', 'w') {|file| file.write("1\n")}
File.open('/proc/sys/kernel/panic','w') {|file| file.write("10\n")}
#Setting RO for all file systems
['s', 'u'].each do |req|
File.open('/proc/sysrq-trigger','w') do |file|
file.write("#{req}\n")
end
end
begin
# Delete data disks first, then OS drive to address bug #1437511
(get_devices(type='data') + get_devices(type='root')).each do |dev|
debug_msg("erasing #{dev[:name]}")
erase_partitions(dev[:name])
erase_data(dev[:name])
erase_data(dev[:name], 1, dev[:size], '512')
end
reply[:erased] = true
rescue Exception => e
reply[:erased] = false
reply[:status] += 1
msg = "MBR can't be erased. Reason: #{e.message};"
Log.error(msg)
error_msg << msg
end
# It should be noted that this is here so that astute will get a reply
# from the deletion task. If it does not get a reply, the deletion may
# fail. LP#1279720
pid = fork do
debug_msg("Run node rebooting command using 'SB' to sysrq-trigger")
sleep 5
File.open('/proc/sys/kernel/sysrq','w') { |file| file.write("1\n") }
# turning panic on oops and setting panic timeout to 10
File.open('/proc/sys/kernel/panic_on_oops', 'w') {|file| file.write("1\n")}
File.open('/proc/sys/kernel/panic','w') {|file| file.write("10\n")}
trap('SIGTERM') do
#Giving 5 seconds for other process to die nicely
sleep 5
#Setting RO for all file systems
['s', 'u'].each do |req|
File.open('/proc/sysrq-trigger','w') do |file|
file.write("#{req}\n")
end
end
begin
# Delete data disks first, then OS drive to address bug #1437511
(get_devices(type='data') + get_devices(type='root')).each do |dev|
debug_msg("erasing #{dev[:name]}")
erase_partitions(dev[:name])
erase_data(dev[:name])
erase_data(dev[:name], 1, dev[:size], '512')
end
reply[:erased] = true
rescue Exception => e
reply[:erased] = false
reply[:status] += 1
msg = "MBR can't be erased. Reason: #{e.message};"
Log.error(msg)
error_msg << msg
end
# Reboot the system
['b'].each do |req|
File.open('/proc/sysrq-trigger','w') do |file|