Enable SR-IOV device plugin restart
In an AIO system, it is possible for the kube-system pods, including the SR-IOV device plugin to start before the worker manifest finishes enabling and binding drivers to network interface VFs. Since the device plugin does not periodically (re)scan the PCI bus, it is required to restart the plugin after completing the SR-IOV driver bind to ensure that the full allocatable set of VFs is inventoried. Note that this can probably be mitigated in the future when the device plugin is converted to use helm / config map rather than having puppet write it's /etc/sriovdp/config.json file. Change-Id: I7972d7a56c2d38884238f7c7818892d0a5b33a0e Closes-Bug: #1885229 Signed-off-by: Steven Webster <steven.webster@windriver.com>
This commit is contained in:
parent
c30274e7ad
commit
ca6546f562
|
@ -117,21 +117,10 @@ define platform::devices::sriov_pf_enable (
|
|||
|
||||
class platform::devices::fpga::fec::vf
|
||||
inherits ::platform::devices::fpga::fec::params {
|
||||
include ::platform::kubernetes::worker::sriovdp
|
||||
require ::platform::devices::fpga::fec::pf
|
||||
create_resources('platform::devices::sriov_vf_bind', $device_config, {})
|
||||
if ($::personality == 'controller') and (length($device_config) > 0) {
|
||||
# In an AIO system, it's possible for the device plugin pods to start
|
||||
# before the device VFs are bound to a driver. Restarting the device
|
||||
# plugin pods will allow them to re-scan the set of matching
|
||||
# device ids/drivers specified in the /etc/pcidp/config.json file.
|
||||
# This may be mitigated by moving to helm + configmap for the device
|
||||
# plugin.
|
||||
exec { 'Restart sriovdp daemonset':
|
||||
path => '/usr/bin:/usr/sbin:/bin',
|
||||
command => 'kubectl --kubeconfig=/etc/kubernetes/admin.conf rollout restart ds -n kube-system kube-sriov-device-plugin-amd64 || true', # lint:ignore:140chars
|
||||
logoutput => true,
|
||||
}
|
||||
}
|
||||
Platform::Devices::Sriov_vf_bind <| |> -> Class['platform::kubernetes::worker::sriovdp']
|
||||
}
|
||||
|
||||
class platform::devices::fpga::fec::pf
|
||||
|
|
|
@ -374,6 +374,7 @@ class platform::kubernetes::worker::pci
|
|||
$pcidp_resources = undef,
|
||||
) {
|
||||
include ::platform::kubernetes::params
|
||||
include ::platform::kubernetes::worker::sriovdp
|
||||
|
||||
file { '/etc/pcidp':
|
||||
ensure => 'directory',
|
||||
|
@ -390,6 +391,25 @@ class platform::kubernetes::worker::pci
|
|||
}
|
||||
}
|
||||
|
||||
class platform::kubernetes::worker::sriovdp {
|
||||
include ::platform::kubernetes::params
|
||||
$host_labels = $::platform::kubernetes::params::host_labels
|
||||
if ($::personality == 'controller') and
|
||||
str2bool($::is_worker_subfunction)
|
||||
and ('sriovdp' in $host_labels) {
|
||||
# In an AIO system, it's possible for the device plugin pods to start
|
||||
# before the device VFs are bound to a driver. Restarting the device
|
||||
# plugin pods will allow them to re-scan the set of matching
|
||||
# device ids/drivers specified in the /etc/pcidp/config.json file.
|
||||
# This may be mitigated by moving to helm + configmap for the device
|
||||
# plugin.
|
||||
exec { 'Restart sriovdp daemonset':
|
||||
path => '/usr/bin:/usr/sbin:/bin',
|
||||
command => 'kubectl --kubeconfig=/etc/kubernetes/admin.conf rollout restart ds -n kube-system kube-sriov-device-plugin-amd64 || true', # lint:ignore:140chars
|
||||
logoutput => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class platform::kubernetes::worker
|
||||
inherits ::platform::kubernetes::params {
|
||||
|
|
|
@ -239,6 +239,7 @@ class platform::interfaces::sriov (
|
|||
create_resources('platform::interfaces::sriov_enable', $sriov_config, {})
|
||||
} else {
|
||||
create_resources('platform::interfaces::sriov_vf_bind', $sriov_config, {})
|
||||
Platform::Interfaces::Sriov_vf_bind <| |> -> Class['::platform::kubernetes::worker::sriovdp']
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue