Update erase_node deletion process

When mcollective is launched via Upstart, the current implementation of the filesystem erase code is not being executed as part of the SIGTERM handler that is called when we attempt to SIGTERM all processes. This change pulls the majority of the code we use to erase a system out of a child thread into the main agent action. This change leaves the reboot in a child process and SIGTERM handler so that astute will get the response for the erase_node command and will mark it complete. An added benefit of this change is that the debug messages are now returned back to astute and show up in the astute.log where previously they were being silently discared. Change-Id: Ib9ee7db0a4d872ea5d1485ec036940f12b0d251e Closes-Bug: 1461074
2015-06-03 15:13:08 -05:00 · 2015-06-03 15:13:08 -05:00 · 1b8588b6ea
parent cbae24e990
commit 1b8588b6ea
1 changed files with 34 additions and 34 deletions
--- a/mcagents/erase_node.rb
+++ b/mcagents/erase_node.rb
@ -100,43 +100,43 @@ module MCollective
      end

      def reboot
+        debug_msg("Run node rebooting command using 'SB' to sysrq-trigger")
+        File.open('/proc/sys/kernel/sysrq','w') { |file| file.write("1\n") }
+        # turning panic on oops and setting panic timeout to 10
+        File.open('/proc/sys/kernel/panic_on_oops', 'w') {|file| file.write("1\n")}
+        File.open('/proc/sys/kernel/panic','w') {|file| file.write("10\n")}
+
+        #Setting RO for all file systems
+        ['s', 'u'].each do |req|
+          File.open('/proc/sysrq-trigger','w') do |file|
+            file.write("#{req}\n")
+          end
+        end
+
+        begin
+          # Delete data disks first, then OS drive to address bug #1437511
+          (get_devices(type='data') + get_devices(type='root')).each do |dev|
+            debug_msg("erasing #{dev[:name]}")
+            erase_partitions(dev[:name])
+            erase_data(dev[:name])
+            erase_data(dev[:name], 1, dev[:size], '512')
+          end
+
+          reply[:erased] = true
+        rescue Exception => e
+          reply[:erased] = false
+          reply[:status] += 1
+          msg = "MBR can't be erased. Reason: #{e.message};"
+          Log.error(msg)
+          error_msg << msg
+        end
+
+        # It should be noted that this is here so that astute will get a reply
+        # from the deletion task. If it does not get a reply, the deletion may
+        # fail. LP#1279720
        pid = fork do
-          debug_msg("Run node rebooting command using 'SB' to sysrq-trigger")
-          sleep 5
-          File.open('/proc/sys/kernel/sysrq','w') { |file| file.write("1\n") }
-          # turning panic on oops and setting panic timeout to 10
-          File.open('/proc/sys/kernel/panic_on_oops', 'w') {|file| file.write("1\n")}
-          File.open('/proc/sys/kernel/panic','w') {|file| file.write("10\n")}
-
          trap('SIGTERM') do
-            #Giving 5 seconds for other process to die nicely
            sleep 5
-
-            #Setting RO for all file systems
-            ['s', 'u'].each do |req|
-              File.open('/proc/sysrq-trigger','w') do |file|
-                file.write("#{req}\n")
-              end
-            end
-
-            begin
-              # Delete data disks first, then OS drive to address bug #1437511
-              (get_devices(type='data') + get_devices(type='root')).each do |dev|
-                debug_msg("erasing #{dev[:name]}")
-                erase_partitions(dev[:name])
-                erase_data(dev[:name])
-                erase_data(dev[:name], 1, dev[:size], '512')
-              end
-
-              reply[:erased] = true
-            rescue Exception => e
-              reply[:erased] = false
-              reply[:status] += 1
-              msg = "MBR can't be erased. Reason: #{e.message};"
-              Log.error(msg)
-              error_msg << msg
-            end
-
            # Reboot the system
            ['b'].each do |req|
              File.open('/proc/sysrq-trigger','w') do |file|