diff --git a/ansible/inventory/group_vars/all/kolla b/ansible/inventory/group_vars/all/kolla index 9efe74e47..93c7a895c 100644 --- a/ansible/inventory/group_vars/all/kolla +++ b/ansible/inventory/group_vars/all/kolla @@ -362,6 +362,9 @@ kolla_overcloud_inventory_top_level_group_map: compute: groups: - compute + compute-vgpu: + groups: + - compute-vgpu monitoring: groups: - monitoring diff --git a/ansible/inventory/group_vars/all/vgpu b/ansible/inventory/group_vars/all/vgpu new file mode 100644 index 000000000..43c828fc0 --- /dev/null +++ b/ansible/inventory/group_vars/all/vgpu @@ -0,0 +1,16 @@ +--- +################################################################################ +# VGPU configuration. + +# URL pointing to location of GRID driver. Examples are: +# "file://path/on/ansible/control/host" +# "http://webserver/NVIDIA-GRID-Linux-KVM-525.105.14-525.105.17-528.89.zip" +vgpu_driver_url: "" + +# Flag to control whether the vGPU playbook should automatically reboot the +# hypervisor. Note: this is necessary for the driver to be loaded correctly. +# Caution should be used when changing this option. +vgpu_do_reboot: true + +# Time to wait when rebooting the host before failing. +vgpu_reboot_timeout: 600 diff --git a/ansible/overcloud-host-configure.yml b/ansible/overcloud-host-configure.yml index 1fa8383cf..e203c8dac 100644 --- a/ansible/overcloud-host-configure.yml +++ b/ansible/overcloud-host-configure.yml @@ -31,3 +31,4 @@ - import_playbook: "apparmor-libvirt.yml" - import_playbook: "swift-block-devices.yml" - import_playbook: "compute-libvirt-host.yml" +- import_playbook: "vgpu.yml" diff --git a/ansible/vgpu.yml b/ansible/vgpu.yml new file mode 100644 index 000000000..ebda83fa6 --- /dev/null +++ b/ansible/vgpu.yml @@ -0,0 +1,42 @@ +--- +- name: Configure IOMMU + hosts: iommu + tags: + - iommu + - vgpu + tasks: + - import_role: + name: stackhpc.linux.iommu + handlers: + - name: Register that a reboot is required + set_fact: + kayobe_needs_reboot: true + listen: reboot + +- name: Configure NVIDIA VGPUs + hosts: vgpu + tags: + - vgpu + tasks: + - import_role: + name: stackhpc.linux.vgpu + handlers: + - name: Register that a reboot is required + set_fact: + kayobe_needs_reboot: true + listen: reboot + +- name: Reboot when required + hosts: iommu:vgpu + tags: + - reboot + - iommu + - vgpu + tasks: + - name: Reboot + reboot: + reboot_timeout: "{{ vgpu_reboot_timeout }}" + become: true + when: + - kayobe_needs_reboot | default(false) | bool + - vgpu_do_reboot | bool diff --git a/doc/source/conf.py b/doc/source/conf.py index 9ed27550f..3c150fec6 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -127,5 +127,6 @@ openstackdocs_projects = [ 'ironic-python-agent', 'kolla', 'kolla-ansible', + 'nova', 'neutron', ] diff --git a/doc/source/configuration/reference/index.rst b/doc/source/configuration/reference/index.rst index 100b7ae84..f88c1f03a 100644 --- a/doc/source/configuration/reference/index.rst +++ b/doc/source/configuration/reference/index.rst @@ -23,4 +23,5 @@ options. docker-registry seed-custom-containers infra-vms + vgpu nova-cells diff --git a/doc/source/configuration/reference/vgpu.rst b/doc/source/configuration/reference/vgpu.rst new file mode 100644 index 000000000..693c89d21 --- /dev/null +++ b/doc/source/configuration/reference/vgpu.rst @@ -0,0 +1,310 @@ +============ +Virtual GPUs +============ + +Kayobe contains playbooks to configure virtualised GPUs on supported NVIDIA hardware. +This allows you to statically create mdev devices that can be used by Nova to present +a virtualised GPU to guest VMs. Known working GPUs are: + +- NVIDIA A100 + +BIOS configuration +================== + +Intel +----- + +* Enable ``VT-x`` in the BIOS for virtualisation support. +* Enable ``VT-d`` in the BIOS for IOMMU support. + +AMD +--- + +* Enable ``AMD-V`` in the BIOS for virtualisation support. +* Enable ``AMD-Vi`` in the BIOS for IOMMU support. + +Example: Dell +------------- + +Enabling SR-IOV with `racadm`: + +.. code:: shell + + /opt/dell/srvadmin/bin/idracadm7 set BIOS.IntegratedDevices.SriovGlobalEnable Enabled + /opt/dell/srvadmin/bin/idracadm7 jobqueue create BIOS.Setup.1-1 + + +Enabling CPU Virtualization with `racadm`: + +.. code:: shell + + /opt/dell/srvadmin/bin/idracadm7 set BIOS.ProcSettings.ProcVirtualization Enabled + /opt/dell/srvadmin/bin/idracadm7 jobqueue create BIOS.Setup.1-1 + + + +Obtain driver from NVIDIA licensing portal +========================================== + +Download NVIDIA GRID driver from `here `__ +(This requires a login). + +.. _Configuration: + +Configuration +============= + +.. seealso:: + + For further context, please see: + + * :ref:`configuration-kayobe` + +Add hosts with supported GPUs to the ``compute-vgpu`` group. If using bifrost +and the ``kayobe overcloud inventory discover`` mechanism, this can be achieved with: + +.. code-block:: yaml + :caption: ``$KAYOBE_CONFIG_PATH/overcloud.yml`` + + overcloud_group_hosts_map: + compute-vgpu: + - "computegpu000" + +Configure the location of the NVIDIA driver: + +.. code-block:: yaml + :caption: ``$KAYOBE_CONFIG_PATH/vgpu.yml`` + + --- + vgpu_driver_url: "https://example.com/NVIDIA-GRID-Linux-KVM-525.105.14-525.105.17-528.89.zip" + +If you don't know which vGPU types your card supports, these +can be determined by following :ref:`VGPU_Types`. + +You can then define ``group_vars`` describing the vGPU configuration: + +.. code-block:: yaml + :caption: ``$KAYOBE_CONFIG_PATH/inventory/group_vars/compute-vgpu/vgpu`` + + #nvidia-692 GRID A100D-4C + #nvidia-693 GRID A100D-8C + #nvidia-694 GRID A100D-10C + #nvidia-695 GRID A100D-16C + #nvidia-696 GRID A100D-20C + #nvidia-697 GRID A100D-40C + #nvidia-698 GRID A100D-80C + #nvidia-699 GRID A100D-1-10C + #nvidia-700 GRID A100D-2-20C + #nvidia-701 GRID A100D-3-40C + #nvidia-702 GRID A100D-4-40C + #nvidia-703 GRID A100D-7-80C + #nvidia-707 GRID A100D-1-10CME + vgpu_definitions: + # Configuring a MIG backed VGPU + - pci_address: "0000:17:00.0" + mig_devices: + # This section describes how to partition the card using MIG. The key + # in the dictionary represents a MIG profile supported by your card and + # the value is the number of MIG devices of that type that you want + # to create. The vGPUS are then created on top of these MIG devices. + # The available profiles can be found in the NVIDIA documentation: + # https://docs.nvidia.com/grid/15.0/grid-vgpu-user-guide/index.html#virtual-gpu-types-grid-reference + "1g.10gb": 1 + "2g.20gb": 3 + virtual_functions: + # The mdev type is the NVIDIA identifier for a particular vGPU. When using + # MIG backed vGPUs these must match up with your MIG devices. See the NVIDIA + # vGPU types section in this document. + - mdev_type: nvidia-700 + index: 0 + - mdev_type: nvidia-700 + index: 1 + - mdev_type: nvidia-700 + index: 2 + - mdev_type: nvidia-699 + index: 3 + # Configuring a card in a time-sliced configuration (non-MIG backed) + - pci_address: "0000:65:00.0" + virtual_functions: + - mdev_type: nvidia-697 + index: 0 + - mdev_type: nvidia-697 + index: 1 + +To apply this configuration, use: + +.. code:: shell + + (kayobe) $ kayobe overcloud host configure -t vgpu + +.. _VGPU_Types: + +NVIDIA vGPU types +================= + +.. seealso:: + + For further context, please see: + + * `NVIDIA docs on vGPU types `__ + * :ref:`configuration-kolla-ansible` + +The NVIDIA vGPU drivers must be installed to be able to query for the available +vGPU types. This can be achieved by not defining any virtual functions in the +vGPU definition: + +.. code-block:: yaml + :caption: ``$KAYOBE_CONFIG_PATH/inventory/group_vars/compute-vgpu/vgpu`` + + vgpu_definitions: + - pci_address: "0000:17:00.0" + virtual_functions: [] + +See :ref:`Configuration`. You can then use ``mdevctl`` to query for the +available vGPU types. + +.. code:: shell + + mdevctl types + +.. _VGPU_Kolla_Configuration: + +Kolla Ansible configuration +=========================== + +.. seealso:: + + For further context, please see: + + * :nova-doc:`Attaching virtual GPU devices to guests in the Nova documentation ` + * :ref:`configuration-kolla-ansible` + +To use the mdev devices that were created, modify ``nova.conf`` to add a list of mdev devices that +can be passed through to guests: + +.. code-block:: ini + :caption: ``$KAYOBE_CONFIG_PATH/kolla/config/nova/nova-compute.conf`` + + {% raw %} + {% if inventory_hostname in groups['compute-vgpu'] %} + [devices] + enabled_mdev_types = nvidia-700, nvidia-699, nvidia-697 + [mdev_nvidia-700] + device_addresses = 0000:17:00.4,0000:17:00.5,0000:17:00.6 + mdev_class = CUSTOM_NVIDIA_700 + [mdev_nvidia-699] + device_addresses = 0000:17:00.7 + mdev_class = CUSTOM_NVIDIA_699 + [mdev_nvidia-697] + device_addresses = 0000:65:00.4,0000:65:00.5 + mdev_class = CUSTOM_NVIDIA_697 + {% endif %} + {% endraw %} + +You will need to adjust the PCI addresses to match the virtual function +addresses. These can be obtained by checking the mdevctl configuration after +applying the :ref:`configuration `: + +.. code-block:: shell + + # mdevctl list + 73269d0f-b2c9-438d-8f28-f9e4bc6c6995 0000:17:00.4 nvidia-700 manual (defined) + dc352ef3-efeb-4a5d-a48e-912eb230bc76 0000:17:00.5 nvidia-700 manual (defined) + a464fbae-1f89-419a-a7bd-3a79c7b2eef4 0000:17:00.6 nvidia-700 manual (defined) + f3b823d3-97c8-4e0a-ae1b-1f102dcb3bce 0000:17:00.7 nvidia-699 manual (defined) + 330be289-ba3f-4416-8c8a-b46ba7e51284 0000:65:00.4 nvidia-700 manual (defined) + 1ba5392c-c61f-4f48-8fb1-4c6b2bbb0673 0000:65:00.5 nvidia-700 manual (defined) + f6868020-eb3a-49c6-9701-6c93e4e3fa9c 0000:65:00.6 nvidia-700 manual (defined) + 00501f37-c468-5ba4-8be2-8d653c4604ed 0000:65:00.7 nvidia-699 manual (defined) + +The mdev_class maps to a resource class that you can set in your flavor definition. +Note that if you only define a single mdev type on a given hypervisor, then the +mdev_class configuration option is silently ignored and it will use the ``VGPU`` +resource class (See `bug 1943934 `__). + +To apply the configuration to Nova: + +.. code:: shell + + (kayobe) $ kayobe overcloud service deploy -kt nova + +OpenStack flavors +================= + +Define some flavors that request the resource class that was configured in ``nova.conf``. +An example definition, that can be used with ``openstack.cloud.compute_flavor`` Ansible module, +is shown below: + +.. code-block:: yaml + + openstack.cloud.compute_flavor: + name: "vgpu.a100.2g.20gb" + ram: 65536 + disk: 30 + vcpus: 8 + is_public: false + extra_specs: + hw:cpu_policy: "dedicated" + hw:cpu_thread_policy: "prefer" + hw:mem_page_size: "1GB" + hw:cpu_sockets: 2 + hw:numa_nodes: 8 + hw_rng:allowed: "True" + resources:CUSTOM_NVIDIA_700: "1" + +Changing VGPU device types +========================== + +Converting the second card to an NVIDIA-698 (whole card). The hypervisor +should be empty so we can freely delete mdevs. If this is not the case you will need +to check which mdevs are in use and use extreme caution. First clean up the mdev +definitions to make room for the new device: + +.. code:: shell + + [stack@computegpu000 ~]$ sudo mdevctl list + 5c630867-a673-5d75-aa31-a499e6c7cb19 0000:21:00.4 nvidia-697 manual (defined) + eaa6e018-308e-58e2-b351-aadbcf01f5a8 0000:21:00.5 nvidia-697 manual (defined) + 72291b01-689b-5b7a-9171-6b3480deabf4 0000:81:00.4 nvidia-697 manual (defined) + 0a47ffd1-392e-5373-8428-707a4e0ce31a 0000:81:00.5 nvidia-697 manual (defined) + + [stack@computegpu000 ~]$ sudo mdevctl stop --uuid 72291b01-689b-5b7a-9171-6b3480deabf4 + [stack@computegpu000 ~]$ sudo mdevctl stop --uuid 0a47ffd1-392e-5373-8428-707a4e0ce31a + + [stack@computegpu000 ~]$ sudo mdevctl undefine --uuid 0a47ffd1-392e-5373-8428-707a4e0ce31a + [stack@computegpu000 ~]$ sudo mdevctl undefine --uuid 72291b01-689b-5b7a-9171-6b3480deabf4 + + [stack@computegpu000 ~]$ sudo mdevctl list --defined + 5c630867-a673-5d75-aa31-a499e6c7cb19 0000:21:00.4 nvidia-697 manual (active) + eaa6e018-308e-58e2-b351-aadbcf01f5a8 0000:21:00.5 nvidia-697 manual (active) + + # We can re-use the first virtual function + +Secondly remove the systemd unit that starts the mdev device: + +.. code:: shell + + [stack@computegpu000 ~]$ sudo rm /etc/systemd/system/multi-user.target.wants/nvidia-mdev@0a47ffd1-392e-5373-8428-707a4e0ce31a.service + [stack@computegpu000 ~]$ sudo rm /etc/systemd/system/multi-user.target.wants/nvidia-mdev@72291b01-689b-5b7a-9171-6b3480deabf4.service + +Adapt your :ref:`Kayobe ` and :ref:`Kolla Ansible ` configuration to +match the desired state and then re-run host configure: + +.. code:: shell + + (kayobe) $ kayobe overcloud host configure --tags vgpu --limit computegpu000 + +Check the result: + +.. code:: shell + + [stack@computegpu000 ~]$ mdevctl list + 5c630867-a673-5d75-aa31-a499e6c7cb19 0000:21:00.4 nvidia-697 manual + eaa6e018-308e-58e2-b351-aadbcf01f5a8 0000:21:00.5 nvidia-697 manual + 72291b01-689b-5b7a-9171-6b3480deabf4 0000:81:00.4 nvidia-698 manual + +Reconfigure nova to match the change: + +.. code:: shell + + (kayobe) $ kayobe overcloud service reconfigure -kt nova --kolla-limit computegpu000 --skip-prechecks diff --git a/etc/kayobe/inventory/groups b/etc/kayobe/inventory/groups index 16619fed8..645a550b5 100644 --- a/etc/kayobe/inventory/groups +++ b/etc/kayobe/inventory/groups @@ -41,8 +41,11 @@ controllers [storage] # Empty group to provide declaration of storage group. -[compute] -# Empty group to provide declaration of compute group. +[compute-vgpu] +# Empty group to provide declaration of compute-vgpu group. + +[compute:children] +compute-vgpu [overcloud:children] controllers @@ -51,6 +54,14 @@ monitoring storage compute +############################################################################### +# Feature control groups +[vgpu:children] +compute-vgpu + +[iommu:children] +vgpu + ############################################################################### # Service groups. diff --git a/etc/kayobe/vgpu.yml b/etc/kayobe/vgpu.yml new file mode 100644 index 000000000..c304f123c --- /dev/null +++ b/etc/kayobe/vgpu.yml @@ -0,0 +1,22 @@ +--- +################################################################################ +# VGPU configuration. + +# URL pointing to location of GRID driver. Examples are: +# "file://path/on/ansible/control/host" +# "http://webserver/NVIDIA-GRID-Linux-KVM-525.105.14-525.105.17-528.89.zip" +# Default is: None. +#vgpu_driver_url: + +# Flag to control whether the vGPU playbook should automatically reboot the +# hypervisor. Note: this is necessary for the driver to be loaded correctly. +# Caution should be used when changing this option. Default is true. +#vgpu_do_reboot: + +# Time to wait when rebooting the host before failing. +# Default is 600 (seconds). +#vgpu_reboot_timeout: + +############################################################################### +# Dummy variable to allow Ansible to accept this file. +workaround_ansible_issue_8743: yes diff --git a/releasenotes/notes/adds-nvidia-vgpu-support-b4c6f08a06c373c6.yaml b/releasenotes/notes/adds-nvidia-vgpu-support-b4c6f08a06c373c6.yaml new file mode 100644 index 000000000..b67cccf1d --- /dev/null +++ b/releasenotes/notes/adds-nvidia-vgpu-support-b4c6f08a06c373c6.yaml @@ -0,0 +1,7 @@ +--- +features: + - | + Adds initial support for configuring NVIDIA vGPUs. For more information please see + the `new documentation + `_ + for this feature. diff --git a/requirements.yml b/requirements.yml index 6a05407f4..1e9807561 100644 --- a/requirements.yml +++ b/requirements.yml @@ -7,6 +7,8 @@ collections: version: 1.1.1 - name: openstack.cloud version: '<3' + - name: stackhpc.linux + version: 1.1.0 roles: - src: ahuffman.resolv