Merge "XenAPI: get vGPU stats from hypervisor"

This commit is contained in:
Zuul 2017-12-04 18:26:37 +00:00 committed by Gerrit Code Review
commit 2c4a1a390a
5 changed files with 322 additions and 2 deletions

View File

@ -67,7 +67,15 @@ class XenAPIDriverTestCase(stubs.XenAPITestBaseNoDB):
},
'vcpus_used': 10,
'pci_passthrough_devices': '',
'host_other-config': {'iscsi_iqn': 'someiqn'}}
'host_other-config': {'iscsi_iqn': 'someiqn'},
'vgpu_stats': {
'c8328467-badf-43d8-8e28-0e096b0f88b1':
{'uuid': '6444c6ee-3a49-42f5-bebb-606b52175e67',
'total': 7,
'max_heads': 1,
'type_name': 'Intel GVT-g',
},
}}
def test_available_resource(self):
driver = self._get_driver()

View File

@ -0,0 +1,202 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import mock
from nova import test
from nova.virt.xenapi import host
class VGPUTestCase(test.NoDBTestCase):
"""Unit tests for Driver operations."""
@mock.patch.object(host.HostState, 'update_status',
return_value='fake_stats_1')
@mock.patch.object(host.HostState, '_get_vgpu_stats_in_group')
def test_get_vgpu_stats_empty_cfg(self, mock_get, mock_update):
# no vGPU type configured.
self.flags(enabled_vgpu_types=[], group='devices')
session = mock.Mock()
host_obj = host.HostState(session)
stats = host_obj._get_vgpu_stats()
session.call_xenapi.assert_not_called()
self.assertEqual(stats, {})
@mock.patch.object(host.HostState, 'update_status',
return_value='fake_stats_1')
@mock.patch.object(host.HostState, '_get_vgpu_stats_in_group')
def test_get_vgpu_stats_single_type(self, mock_get, mock_update):
# configured single vGPU type
self.flags(enabled_vgpu_types=['type_name_1'], group='devices')
session = mock.Mock()
# multiple GPU groups
session.call_xenapi.side_effect = [
['grp_ref1', 'grp_ref2'], # GPU_group.get_all
'uuid_1', # GPU_group.get_uuid
'uuid_2', # GPU_group.get_uuid
]
# Let it return None for the 2nd GPU group for the case
# that it doesn't have the specified vGPU type enabled.
mock_get.side_effect = ['fake_stats_1', None]
host_obj = host.HostState(session)
stats = host_obj._get_vgpu_stats()
self.assertEqual(session.call_xenapi.call_count, 3)
self.assertEqual(mock_update.call_count, 1)
self.assertEqual(mock_get.call_count, 2)
self.assertEqual(stats, {'uuid_1': 'fake_stats_1'})
@mock.patch.object(host.HostState, 'update_status',
return_value='fake_stats_1')
@mock.patch.object(host.HostState, '_get_vgpu_stats_in_group')
def test_get_vgpu_stats_multi_types(self, mock_get, mock_update):
# when multiple vGPU types configured, it use the first one.
self.flags(enabled_vgpu_types=['type_name_1', 'type_name_2'],
group='devices')
session = mock.Mock()
session.call_xenapi.side_effect = [
['grp_ref1'], # GPU_group.get_all
'uuid_1', # GPU_group.get_uuid
]
mock_get.side_effect = ['fake_stats_1']
host_obj = host.HostState(session)
stats = host_obj._get_vgpu_stats()
self.assertEqual(session.call_xenapi.call_count, 2)
self.assertEqual(mock_update.call_count, 1)
self.assertEqual(stats, {'uuid_1': 'fake_stats_1'})
# called with the first vGPU type: 'type_name_1'
mock_get.assert_called_with('grp_ref1', ['type_name_1'])
@mock.patch.object(host.HostState, 'update_status',
return_value='fake_stats_1')
@mock.patch.object(host.HostState, '_get_total_vgpu_in_grp',
return_value=7)
def test_get_vgpu_stats_in_group(self, mock_get, mock_update):
# Test it will return vGPU stat for the enabled vGPU type.
enabled_vgpu_types = ['type_name_2']
session = mock.Mock()
session.call_xenapi.side_effect = [
['type_ref_1', 'type_ref_2'], # GPU_group.get_enabled_VGPU_types
'type_name_1', # VGPU_type.get_model_name
'type_name_2', # VGPU_type.get_model_name
'type_uuid_2', # VGPU_type.get_uuid
'4', # VGPU_type.get_max_heads
]
host_obj = host.HostState(session)
stats = host_obj._get_vgpu_stats_in_group('grp_ref',
enabled_vgpu_types)
expect_stats = {'uuid': 'type_uuid_2',
'type_name': 'type_name_2',
'max_heads': 4,
'total': 7,
}
self.assertEqual(session.call_xenapi.call_count, 5)
# It should get_uuid for the vGPU type passed via *enabled_vgpu_types*
# (the arg for get_uuid should be 'type_ref_2').
get_uuid_call = [mock.call('VGPU_type.get_uuid', 'type_ref_2')]
session.call_xenapi.assert_has_calls(get_uuid_call)
mock_get.assert_called_once()
self.assertEqual(expect_stats, stats)
@mock.patch.object(host.HostState, 'update_status')
@mock.patch.object(host.HostState, '_get_total_vgpu_in_grp',
return_value=7)
def test_get_vgpu_stats_in_group_multiple(self, mock_get, mock_update):
# Test when enabled multiple vGPU types in the same group.
# It should only return the first vGPU type's stats.
enabled_vgpu_types = ['type_name_1', 'type_name_2']
session = mock.Mock()
session.call_xenapi.side_effect = [
['type_ref_1', 'type_ref_2'], # GPU_group.get_enabled_VGPU_types
'type_name_1', # VGPU_type.get_model_name
'type_name_2', # VGPU_type.get_model_name
'type_uuid_1', # VGPU_type.get_uuid
'4', # VGPU_type.get_max_heads
]
host_obj = host.HostState(session)
stats = host_obj._get_vgpu_stats_in_group('grp_ref',
enabled_vgpu_types)
expect_stats = {
'uuid': 'type_uuid_1',
'type_name': 'type_name_1',
'max_heads': 4,
'total': 7,
}
self.assertEqual(session.call_xenapi.call_count, 5)
# It should call get_uuid for the first vGPU type (the arg for get_uuid
# should be 'type_ref_1').
get_uuid_call = [mock.call('VGPU_type.get_uuid', 'type_ref_1')]
session.call_xenapi.assert_has_calls(get_uuid_call)
mock_get.assert_called_once()
self.assertEqual(expect_stats, stats)
@mock.patch.object(host.HostState, 'update_status')
@mock.patch.object(host.HostState, '_get_total_vgpu_in_grp',
return_value=7)
def test_get_vgpu_stats_in_group_cfg_not_in_grp(self, mock_get,
mock_update):
# Test when the enable_vgpu_types is not a valid
# type belong to the GPU group. It will return None.
enabled_vgpu_types = ['bad_type_name']
session = mock.Mock()
session.call_xenapi.side_effect = [
['type_ref_1', 'type_ref_2'], # GPU_group.get_enabled_VGPU_types
'type_name_1', # VGPU_type.get_model_name
'type_name_2', # VGPU_type.get_model_name
]
host_obj = host.HostState(session)
stats = host_obj._get_vgpu_stats_in_group('grp_ref',
enabled_vgpu_types)
expect_stats = None
self.assertEqual(session.call_xenapi.call_count, 3)
mock_get.assert_not_called()
self.assertEqual(expect_stats, stats)
@mock.patch.object(host.HostState, 'update_status')
def test_get_total_vgpu_in_grp(self, mock_update):
session = mock.Mock()
# The fake PGPU records returned from call_xenapi's string function:
# "PGPU.get_all_records_where".
pgpu_records = {
'pgpu_ref1': {
'enabled_VGPU_types': ['type_ref1', 'type_ref2'],
'supported_VGPU_max_capacities': {
'type_ref1': '1',
'type_ref2': '3',
}
},
'pgpu_ref2': {
'enabled_VGPU_types': ['type_ref1', 'type_ref2'],
'supported_VGPU_max_capacities': {
'type_ref1': '1',
'type_ref2': '3',
}
}
}
session.call_xenapi.return_value = pgpu_records
host_obj = host.HostState(session)
total = host_obj._get_total_vgpu_in_grp('grp_ref', 'type_ref1')
session.call_xenapi.assert_called_with(
'PGPU.get_all_records_where', 'field "GPU_group" = "grp_ref"')
# The total amount of VGPUs is equal to sum of vaiable VGPU of
# 'type_ref1' in all PGPUs.
self.assertEqual(total, 2)

View File

@ -2230,12 +2230,14 @@ class XenAPIHostTestCase(stubs.XenAPITestBase):
@mock.patch.object(host.HostState, 'get_disk_used')
@mock.patch.object(host.HostState, '_get_passthrough_devices')
@mock.patch.object(host.HostState, '_get_vgpu_stats')
@mock.patch.object(jsonutils, 'loads')
@mock.patch.object(vm_utils, 'list_vms')
@mock.patch.object(vm_utils, 'scan_default_sr')
@mock.patch.object(host_management, 'get_host_data')
def test_update_stats_caches_hostname(self, mock_host_data, mock_scan_sr,
mock_list_vms, mock_loads,
mock_vgpus_stats,
mock_devices, mock_dis_used):
data = {'disk_total': 0,
'disk_used': 0,
@ -2266,10 +2268,12 @@ class XenAPIHostTestCase(stubs.XenAPITestBase):
self.assertEqual(2, mock_host_data.call_count)
self.assertEqual(2, mock_scan_sr.call_count)
self.assertEqual(2, mock_devices.call_count)
self.assertEqual(2, mock_vgpus_stats.call_count)
mock_loads.assert_called_with(data)
mock_host_data.assert_called_with(self.conn._session)
mock_scan_sr.assert_called_with(self.conn._session)
mock_devices.assert_called_with()
mock_vgpus_stats.assert_called_with()
@mock.patch.object(host.HostState, 'update_status')

View File

@ -68,7 +68,8 @@ from nova.i18n import _
_CLASSES = ['host', 'network', 'session', 'pool', 'SR', 'VBD',
'PBD', 'VDI', 'VIF', 'PIF', 'VM', 'VLAN', 'task']
'PBD', 'VDI', 'VIF', 'PIF', 'VM', 'VLAN', 'task',
'GPU_group', 'PGPU', 'VGPU_type']
_after_create_functions = {}
_destroy_functions = {}

View File

@ -220,6 +220,110 @@ class HostState(object):
return passthrough_devices
def _get_vgpu_stats(self):
"""Invoke XenAPI to get the stats for VGPUs.
The return value is a dict which has GPU groups' uuid as
the keys:
dict(grp_uuid_1=dict_vgpu_stats_in_grp_1,
grp_uuid_2=dict_vgpu_stats_in_grp_2,
...,
grp_uuid_n=dict_vgpu_stats_in_grp_n)
The `dict_vgpu_stats_in_grp_x` is a dict represents the
vGPU stats in GPU group x. For details, please refer to
the return value of the function of _get_vgpu_stats_in_group().
"""
if not CONF.devices.enabled_vgpu_types:
return {}
vgpu_stats = {}
# NOTE(jianghuaw): If there are multiple vGPU types enabled in
# the configure option, we only choose the first one so that
# we support only one vGPU type per compute node at the moment.
# Once we switch to use the nested resource providers, we will
# remove these lines to allow multiple vGPU types within multiple
# GPU groups (each group has a different vGPU type enabled).
if len(CONF.devices.enabled_vgpu_types) > 1:
LOG.warning('XenAPI only supports one GPU type per compute node,'
' only first type will be used.')
cfg_enabled_types = CONF.devices.enabled_vgpu_types[:1]
vgpu_grp_refs = self._session.call_xenapi('GPU_group.get_all')
for ref in vgpu_grp_refs:
grp_uuid = self._session.call_xenapi('GPU_group.get_uuid', ref)
stat = self._get_vgpu_stats_in_group(ref, cfg_enabled_types)
if stat:
vgpu_stats[grp_uuid] = stat
LOG.debug("Returning vGPU stats: %s", vgpu_stats)
return vgpu_stats
def _get_vgpu_stats_in_group(self, grp_ref, vgpu_types):
"""Get stats for the specified vGPU types in a GPU group.
NOTE(Jianghuaw): In XenAPI, a GPU group is the minimal unit
from where to create a vGPU for an instance. So here, we
report vGPU resources for a particular GPU group. When we use
nested resource providers to represent the vGPU resources,
each GPU group will be a child resource provider under the
compute node.
The return value is a dict. For example:
{'uuid': '6444c6ee-3a49-42f5-bebb-606b52175e67',
'total': 7,
'max_heads': '1',
'type_name': 'Intel GVT-g',
}
"""
type_refs_in_grp = self._session.call_xenapi(
'GPU_group.get_enabled_VGPU_types', grp_ref)
type_names_in_grp = {self._session.call_xenapi(
'VGPU_type.get_model_name',
type_ref): type_ref
for type_ref in type_refs_in_grp}
# Get the vGPU types enabled both in this GPU group and in the
# nova conf.
enabled_types = set(vgpu_types) & set(type_names_in_grp)
if not enabled_types:
return
stat = {}
# Get the sorted enabled types, so that we can always choose the same
# type when there are multiple enabled vGPU types.
sorted_types = sorted(enabled_types)
chosen_type = sorted_types[0]
if len(sorted_types) > 1:
LOG.warning('XenAPI only supports one vGPU type per GPU group,'
' but enabled multiple vGPU types: %(available)s.'
' Choosing the first one: %(chosen)s.',
dict(available=sorted_types,
chosen=chosen_type))
type_ref = type_names_in_grp[chosen_type]
type_uuid = self._session.call_xenapi('VGPU_type.get_uuid', type_ref)
stat['uuid'] = type_uuid
stat['type_name'] = chosen_type
stat['max_heads'] = int(self._session.call_xenapi(
'VGPU_type.get_max_heads', type_ref))
stat['total'] = self._get_total_vgpu_in_grp(grp_ref, type_ref)
return stat
def _get_total_vgpu_in_grp(self, grp_ref, type_ref):
"""Get the total capacity of vGPUs in the group."""
pgpu_recs = self._session.call_xenapi(
'PGPU.get_all_records_where', 'field "GPU_group" = "%s"' % grp_ref)
total = 0
for pgpu_ref in pgpu_recs:
pgpu_rec = pgpu_recs[pgpu_ref]
if type_ref in pgpu_rec['enabled_VGPU_types']:
cap = pgpu_rec['supported_VGPU_max_capacities'][type_ref]
total += int(cap)
return total
def get_host_stats(self, refresh=False):
"""Return the current state of the host. If 'refresh' is
True, run the update first.
@ -309,6 +413,7 @@ class HostState(object):
vcpus_used = vcpus_used + int(vm_rec['VCPUs_max'])
data['vcpus_used'] = vcpus_used
data['pci_passthrough_devices'] = self._get_passthrough_devices()
data['vgpu_stats'] = self._get_vgpu_stats()
self._stats = data