Poweroff server after 10 tries

Some faulty iPXE FW cannot load kernel or initrd file causing DoS on
http server (especially when environment has hundreds of faulty nods).
This patch changes the behavior to poweroff the hardware node after 10
unsuccessful retries to get kernel or initrd over http.

Story: #2002928
Task: #22915
Change-Id: Iec4650499c51c4c7dac38c279728d294ed3434b6
Co-Authored-By: Julia Kreger <juliaashleykreger@gmail.com>
This commit is contained in:
Sergii Golovatiuk 2018-06-27 19:00:52 +02:00 committed by Julia Kreger
parent ea302a8cd5
commit fe2608d00a
6 changed files with 100 additions and 11 deletions

View File

@ -1,14 +1,30 @@
#!ipxe
set attempts:int32 10
set i:int32 0
goto deploy
:deploy
imgfree
kernel {% if pxe_options.ipxe_timeout > 0 %}--timeout {{ pxe_options.ipxe_timeout }} {% endif %}{{ pxe_options.deployment_aki_path }} selinux=0 troubleshoot=0 text {{ pxe_options.pxe_append_params|default("", true) }} BOOTIF=${mac} ipa-api-url={{ pxe_options['ipa-api-url'] }} initrd={{ pxe_options.initrd_filename|default("deploy_ramdisk", true) }} coreos.configdrive=0 || goto deploy
kernel {% if pxe_options.ipxe_timeout > 0 %}--timeout {{ pxe_options.ipxe_timeout }} {% endif %}{{ pxe_options.deployment_aki_path }} selinux=0 troubleshoot=0 text {{ pxe_options.pxe_append_params|default("", true) }} BOOTIF=${mac} ipa-api-url={{ pxe_options['ipa-api-url'] }} initrd={{ pxe_options.initrd_filename|default("deploy_ramdisk", true) }} coreos.configdrive=0 || goto retry
initrd {% if pxe_options.ipxe_timeout > 0 %}--timeout {{ pxe_options.ipxe_timeout }} {% endif %}{{ pxe_options.deployment_ari_path }} || goto deploy
initrd {% if pxe_options.ipxe_timeout > 0 %}--timeout {{ pxe_options.ipxe_timeout }} {% endif %}{{ pxe_options.deployment_ari_path }} || goto retry
boot
:retry
iseq ${i} ${attempts} && goto fail ||
inc i
echo No response, retrying in {i} seconds.
sleep ${i}
goto deploy
:fail
echo Failed to get a response after ${attempts} attempts
echo Powering off in 30 seconds.
sleep 30
poweroff
:boot_partition
imgfree
kernel {% if pxe_options.ipxe_timeout > 0 %}--timeout {{ pxe_options.ipxe_timeout }} {% endif %}{{ pxe_options.aki_path }} root={{ ROOT }} ro text {{ pxe_options.pxe_append_params|default("", true) }} initrd=ramdisk || goto boot_partition
@ -39,4 +55,4 @@ goto boot_iscsi
{%- endif %}
:boot_whole_disk
sanboot --no-describe
sanboot --no-describe

View File

@ -1,14 +1,30 @@
#!ipxe
set attempts:int32 10
set i:int32 0
goto deploy
:deploy
imgfree
kernel http://1.2.3.4:1234/deploy_kernel selinux=0 troubleshoot=0 text test_param BOOTIF=${mac} ipa-api-url=http://192.168.122.184:6385 initrd=deploy_ramdisk coreos.configdrive=0 || goto deploy
kernel http://1.2.3.4:1234/deploy_kernel selinux=0 troubleshoot=0 text test_param BOOTIF=${mac} ipa-api-url=http://192.168.122.184:6385 initrd=deploy_ramdisk coreos.configdrive=0 || goto retry
initrd http://1.2.3.4:1234/deploy_ramdisk || goto deploy
initrd http://1.2.3.4:1234/deploy_ramdisk || goto retry
boot
:retry
iseq ${i} ${attempts} && goto fail ||
inc i
echo No response, retrying in {i} seconds.
sleep ${i}
goto deploy
:fail
echo Failed to get a response after ${attempts} attempts
echo Powering off in 30 seconds.
sleep 30
poweroff
:boot_partition
imgfree
kernel http://1.2.3.4:1234/kernel root={{ ROOT }} ro text test_param initrd=ramdisk || goto boot_partition

View File

@ -1,14 +1,30 @@
#!ipxe
set attempts:int32 10
set i:int32 0
goto deploy
:deploy
imgfree
kernel http://1.2.3.4:1234/deploy_kernel selinux=0 troubleshoot=0 text test_param BOOTIF=${mac} ipa-api-url=http://192.168.122.184:6385 initrd=deploy_ramdisk coreos.configdrive=0 || goto deploy
kernel http://1.2.3.4:1234/deploy_kernel selinux=0 troubleshoot=0 text test_param BOOTIF=${mac} ipa-api-url=http://192.168.122.184:6385 initrd=deploy_ramdisk coreos.configdrive=0 || goto retry
initrd http://1.2.3.4:1234/deploy_ramdisk || goto deploy
initrd http://1.2.3.4:1234/deploy_ramdisk || goto retry
boot
:retry
iseq ${i} ${attempts} && goto fail ||
inc i
echo No response, retrying in {i} seconds.
sleep ${i}
goto deploy
:fail
echo Failed to get a response after ${attempts} attempts
echo Powering off in 30 seconds.
sleep 30
poweroff
:boot_partition
imgfree
kernel http://1.2.3.4:1234/kernel root={{ ROOT }} ro text test_param initrd=ramdisk || goto boot_partition

View File

@ -1,14 +1,30 @@
#!ipxe
set attempts:int32 10
set i:int32 0
goto deploy
:deploy
imgfree
kernel http://1.2.3.4:1234/deploy_kernel selinux=0 troubleshoot=0 text test_param BOOTIF=${mac} ipa-api-url=http://192.168.122.184:6385 initrd=deploy_ramdisk coreos.configdrive=0 || goto deploy
kernel http://1.2.3.4:1234/deploy_kernel selinux=0 troubleshoot=0 text test_param BOOTIF=${mac} ipa-api-url=http://192.168.122.184:6385 initrd=deploy_ramdisk coreos.configdrive=0 || goto retry
initrd http://1.2.3.4:1234/deploy_ramdisk || goto deploy
initrd http://1.2.3.4:1234/deploy_ramdisk || goto retry
boot
:retry
iseq ${i} ${attempts} && goto fail ||
inc i
echo No response, retrying in {i} seconds.
sleep ${i}
goto deploy
:fail
echo Failed to get a response after ${attempts} attempts
echo Powering off in 30 seconds.
sleep 30
poweroff
:boot_partition
imgfree
kernel http://1.2.3.4:1234/kernel root={{ ROOT }} ro text test_param initrd=ramdisk || goto boot_partition

View File

@ -1,14 +1,30 @@
#!ipxe
set attempts:int32 10
set i:int32 0
goto deploy
:deploy
imgfree
kernel --timeout 120 http://1.2.3.4:1234/deploy_kernel selinux=0 troubleshoot=0 text test_param BOOTIF=${mac} ipa-api-url=http://192.168.122.184:6385 initrd=deploy_ramdisk coreos.configdrive=0 || goto deploy
kernel --timeout 120 http://1.2.3.4:1234/deploy_kernel selinux=0 troubleshoot=0 text test_param BOOTIF=${mac} ipa-api-url=http://192.168.122.184:6385 initrd=deploy_ramdisk coreos.configdrive=0 || goto retry
initrd --timeout 120 http://1.2.3.4:1234/deploy_ramdisk || goto deploy
initrd --timeout 120 http://1.2.3.4:1234/deploy_ramdisk || goto retry
boot
:retry
iseq ${i} ${attempts} && goto fail ||
inc i
echo No response, retrying in {i} seconds.
sleep ${i}
goto deploy
:fail
echo Failed to get a response after ${attempts} attempts
echo Powering off in 30 seconds.
sleep 30
poweroff
:boot_partition
imgfree
kernel --timeout 120 http://1.2.3.4:1234/kernel root={{ ROOT }} ro text test_param initrd=ramdisk || goto boot_partition

View File

@ -0,0 +1,9 @@
---
fixes:
- |
Changes the iPXE behavior to retry a total of 10 times with an increasing
backoff time between each retry in order to not create a Denial of Service
situation with the iPXE HTTP server. Should the retries fail, the node will
be powered-off after a warning is displayed on the console for 30 seconds.
For more information, see
`story <https://storyboard.openstack.org/#!/story/2002928>`_.