Enable SR-IOV device plugin restart

In an AIO system, it is possible for the kube-system pods, including
the SR-IOV device plugin to start before the worker manifest finishes
enabling and binding drivers to network interface VFs.

Since the device plugin does not periodically (re)scan the PCI bus,
it is required to restart the plugin after completing the SR-IOV
driver bind to ensure that the full allocatable set of VFs is
inventoried.

Note that this can probably be mitigated in the future when the
device plugin is converted to use helm / config map rather than
having puppet write it's /etc/sriovdp/config.json file.

Change-Id: I7972d7a56c2d38884238f7c7818892d0a5b33a0e
Closes-Bug: #1885229
Signed-off-by: Steven Webster <steven.webster@windriver.com>
This commit is contained in:
Steven Webster 2020-06-26 14:05:48 -04:00
parent c30274e7ad
commit ca6546f562
3 changed files with 23 additions and 13 deletions

View File

@ -117,21 +117,10 @@ define platform::devices::sriov_pf_enable (
class platform::devices::fpga::fec::vf
inherits ::platform::devices::fpga::fec::params {
include ::platform::kubernetes::worker::sriovdp
require ::platform::devices::fpga::fec::pf
create_resources('platform::devices::sriov_vf_bind', $device_config, {})
if ($::personality == 'controller') and (length($device_config) > 0) {
# In an AIO system, it's possible for the device plugin pods to start
# before the device VFs are bound to a driver. Restarting the device
# plugin pods will allow them to re-scan the set of matching
# device ids/drivers specified in the /etc/pcidp/config.json file.
# This may be mitigated by moving to helm + configmap for the device
# plugin.
exec { 'Restart sriovdp daemonset':
path => '/usr/bin:/usr/sbin:/bin',
command => 'kubectl --kubeconfig=/etc/kubernetes/admin.conf rollout restart ds -n kube-system kube-sriov-device-plugin-amd64 || true', # lint:ignore:140chars
logoutput => true,
}
}
Platform::Devices::Sriov_vf_bind <| |> -> Class['platform::kubernetes::worker::sriovdp']
}
class platform::devices::fpga::fec::pf

View File

@ -374,6 +374,7 @@ class platform::kubernetes::worker::pci
$pcidp_resources = undef,
) {
include ::platform::kubernetes::params
include ::platform::kubernetes::worker::sriovdp
file { '/etc/pcidp':
ensure => 'directory',
@ -390,6 +391,25 @@ class platform::kubernetes::worker::pci
}
}
class platform::kubernetes::worker::sriovdp {
include ::platform::kubernetes::params
$host_labels = $::platform::kubernetes::params::host_labels
if ($::personality == 'controller') and
str2bool($::is_worker_subfunction)
and ('sriovdp' in $host_labels) {
# In an AIO system, it's possible for the device plugin pods to start
# before the device VFs are bound to a driver. Restarting the device
# plugin pods will allow them to re-scan the set of matching
# device ids/drivers specified in the /etc/pcidp/config.json file.
# This may be mitigated by moving to helm + configmap for the device
# plugin.
exec { 'Restart sriovdp daemonset':
path => '/usr/bin:/usr/sbin:/bin',
command => 'kubectl --kubeconfig=/etc/kubernetes/admin.conf rollout restart ds -n kube-system kube-sriov-device-plugin-amd64 || true', # lint:ignore:140chars
logoutput => true,
}
}
}
class platform::kubernetes::worker
inherits ::platform::kubernetes::params {

View File

@ -239,6 +239,7 @@ class platform::interfaces::sriov (
create_resources('platform::interfaces::sriov_enable', $sriov_config, {})
} else {
create_resources('platform::interfaces::sriov_vf_bind', $sriov_config, {})
Platform::Interfaces::Sriov_vf_bind <| |> -> Class['::platform::kubernetes::worker::sriovdp']
}
}