Cyborg shelve/unshelve support

This change extends the conductor manager to append the cyborg
resource request to the request spec when performing an unshelve.

On shelve offload an instance will be deleted the instance's ARQs
binding info to free up the bound ARQs in Cyborg service.
And this change passes the ARQs to spawn during unshelve an instance.

This change extends the ``shelve_instance``, ``shelve_offload_instance``
and ``unshelve_instance`` rpcapi function to carry the arq_uuids.

Co-Authored-By: Wenping Song <songwenping@inspur.com>

Implements: blueprint cyborg-shelve-and-unshelve
Change-Id: I258df4d77f6d86df1d867a8fe27360731c21d237
This commit is contained in:
zhangbailin 2020-05-20 18:34:59 +08:00
parent 261416aeb0
commit 7fbd787b1b
15 changed files with 827 additions and 89 deletions

View File

@ -43,12 +43,12 @@ The lists of supported and unsupported operations are as below:
* Rescue and unrescue.
* Rebuild.
* Evacuate.
* Shelve and unshelve.
* Unsupported operations
* Resize.
* Suspend and resume.
* Shelve and unshelve.
* Cold migration.
* Live migration.
@ -56,6 +56,10 @@ The lists of supported and unsupported operations are as below:
Added support for rebuild and evacuate operations.
.. versionchanged:: 23.0.0(Wallaby)
Added support for shelve and unshelve operations.
Some operations, such as lock and unlock, work as they are effectively
no-ops for accelerators.

View File

@ -37,7 +37,7 @@ class ShelveController(wsgi.Controller):
self.network_api = neutron.API()
@wsgi.response(202)
@wsgi.expected_errors((404, 409))
@wsgi.expected_errors((404, 403, 409))
@wsgi.action('shelve')
def _shelve(self, req, id, body):
"""Move an instance into shelved mode."""
@ -55,6 +55,8 @@ class ShelveController(wsgi.Controller):
exception.UnexpectedTaskStateError,
) as e:
raise exc.HTTPConflict(explanation=e.format_message())
except exception.ForbiddenWithAccelerators as e:
raise exc.HTTPForbidden(explanation=e.format_message())
except exception.InstanceInvalidState as state_error:
common.raise_http_conflict_for_instance_invalid_state(state_error,
'shelve', id)

View File

@ -4158,7 +4158,7 @@ class API(base.Base):
return allow_same_host
@reject_vtpm_instances(instance_actions.SHELVE)
@block_accelerators()
@block_accelerators(until_service=54)
@check_instance_lock
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
vm_states.PAUSED, vm_states.SUSPENDED])
@ -4181,16 +4181,23 @@ class API(base.Base):
self._record_action_start(context, instance, instance_actions.SHELVE)
accel_uuids = []
if instance.flavor.extra_specs.get('accel:device_profile'):
cyclient = cyborg.get_client(context)
accel_uuids = cyclient.get_arq_uuids_for_instance(instance)
if not compute_utils.is_volume_backed_instance(context, instance):
name = '%s-shelved' % instance.display_name
image_meta = compute_utils.create_image(
context, instance, name, 'snapshot', self.image_api)
image_id = image_meta['id']
self.compute_rpcapi.shelve_instance(context, instance=instance,
image_id=image_id, clean_shutdown=clean_shutdown)
image_id=image_id, clean_shutdown=clean_shutdown,
accel_uuids=accel_uuids)
else:
self.compute_rpcapi.shelve_offload_instance(context,
instance=instance, clean_shutdown=clean_shutdown)
self.compute_rpcapi.shelve_offload_instance(
context, instance=instance, clean_shutdown=clean_shutdown,
accel_uuids=accel_uuids)
@check_instance_lock
@check_instance_state(vm_state=[vm_states.SHELVED])
@ -4202,8 +4209,14 @@ class API(base.Base):
self._record_action_start(context, instance,
instance_actions.SHELVE_OFFLOAD)
self.compute_rpcapi.shelve_offload_instance(context, instance=instance,
clean_shutdown=clean_shutdown)
accel_uuids = []
if instance.flavor.extra_specs.get('accel:device_profile'):
cyclient = cyborg.get_client(context)
accel_uuids = cyclient.get_arq_uuids_for_instance(instance)
self.compute_rpcapi.shelve_offload_instance(
context, instance=instance,
clean_shutdown=clean_shutdown, accel_uuids=accel_uuids)
def _validate_unshelve_az(self, context, instance, availability_zone):
"""Verify the specified availability_zone during unshelve.

View File

@ -528,7 +528,7 @@ class ComputeVirtAPI(virtapi.VirtAPI):
class ComputeManager(manager.Manager):
"""Manages the running instances from creation to destruction."""
target = messaging.Target(version='5.12')
target = messaging.Target(version='5.13')
def __init__(self, compute_driver=None, *args, **kwargs):
"""Load configuration options and connect to the hypervisor."""
@ -6326,7 +6326,7 @@ class ComputeManager(manager.Manager):
@wrap_instance_event(prefix='compute')
@wrap_instance_fault
def shelve_instance(self, context, instance, image_id,
clean_shutdown):
clean_shutdown, accel_uuids=None):
"""Shelve an instance.
This should be used when you want to take a snapshot of the instance.
@ -6337,15 +6337,17 @@ class ComputeManager(manager.Manager):
:param instance: an Instance object
:param image_id: an image id to snapshot to.
:param clean_shutdown: give the GuestOS a chance to stop
:param accel_uuids: the accelerators uuids for the instance
"""
@utils.synchronized(instance.uuid)
def do_shelve_instance():
self._shelve_instance(context, instance, image_id, clean_shutdown)
self._shelve_instance(context, instance, image_id, clean_shutdown,
accel_uuids)
do_shelve_instance()
def _shelve_instance(self, context, instance, image_id,
clean_shutdown):
clean_shutdown, accel_uuids=None):
LOG.info('Shelving', instance=instance)
offload = CONF.shelved_offload_time == 0
if offload:
@ -6400,14 +6402,16 @@ class ComputeManager(manager.Manager):
phase=fields.NotificationPhase.END, bdms=bdms)
if offload:
self._shelve_offload_instance(context, instance,
clean_shutdown=False, bdms=bdms)
self._shelve_offload_instance(
context, instance, clean_shutdown=False, bdms=bdms,
accel_uuids=accel_uuids)
@wrap_exception()
@reverts_task_state
@wrap_instance_event(prefix='compute')
@wrap_instance_fault
def shelve_offload_instance(self, context, instance, clean_shutdown):
def shelve_offload_instance(self, context, instance, clean_shutdown,
accel_uuids=None):
"""Remove a shelved instance from the hypervisor.
This frees up those resources for use by other instances, but may lead
@ -6418,15 +6422,17 @@ class ComputeManager(manager.Manager):
:param context: request context
:param instance: nova.objects.instance.Instance
:param clean_shutdown: give the GuestOS a chance to stop
:param accel_uuids: the accelerators uuids for the instance
"""
@utils.synchronized(instance.uuid)
def do_shelve_offload_instance():
self._shelve_offload_instance(context, instance, clean_shutdown)
self._shelve_offload_instance(context, instance, clean_shutdown,
accel_uuids=accel_uuids)
do_shelve_offload_instance()
def _shelve_offload_instance(self, context, instance, clean_shutdown,
bdms=None):
bdms=None, accel_uuids=None):
LOG.info('Shelve offloading', instance=instance)
if bdms is None:
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
@ -6451,6 +6457,12 @@ class ComputeManager(manager.Manager):
# terminate all the connections with the volume server and the host
self._terminate_volume_connections(context, instance, bdms)
# NOTE(brinzhang): Free up the accelerator resource occupied
# in the cyborg service.
if accel_uuids:
cyclient = cyborg.get_client(context)
cyclient.delete_arqs_for_instance(instance.uuid)
# Free up the resource allocations in the placement service.
# This should happen *before* the vm_state is changed to
# SHELVED_OFFLOADED in case client-side code is polling the API to
@ -6490,8 +6502,9 @@ class ComputeManager(manager.Manager):
@reverts_task_state
@wrap_instance_event(prefix='compute')
@wrap_instance_fault
def unshelve_instance(self, context, instance, image,
filter_properties, node, request_spec=None):
def unshelve_instance(
self, context, instance, image, filter_properties, node,
request_spec=None, accel_uuids=None):
"""Unshelve the instance.
:param context: request context
@ -6502,6 +6515,7 @@ class ComputeManager(manager.Manager):
:param node: target compute node
:param request_spec: the RequestSpec object used to schedule the
instance
:param accel_uuids: the accelerators uuids for the instance
"""
if filter_properties is None:
filter_properties = {}
@ -6510,7 +6524,7 @@ class ComputeManager(manager.Manager):
def do_unshelve_instance():
self._unshelve_instance(
context, instance, image, filter_properties, node,
request_spec)
request_spec, accel_uuids)
do_unshelve_instance()
def _unshelve_instance_key_scrub(self, instance):
@ -6527,7 +6541,7 @@ class ComputeManager(manager.Manager):
instance.update(keys)
def _unshelve_instance(self, context, instance, image, filter_properties,
node, request_spec):
node, request_spec, accel_uuids):
LOG.info('Unshelving', instance=instance)
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
context, instance.uuid)
@ -6574,6 +6588,19 @@ class ComputeManager(manager.Manager):
provider_mappings=provider_mappings)
network_info = self.network_api.get_instance_nw_info(
context, instance)
accel_info = []
if accel_uuids:
try:
accel_info = self._get_bound_arq_resources(
context, instance, accel_uuids)
except (Exception, eventlet.timeout.Timeout) as exc:
LOG.exception('Failure getting accelerator requests '
'with the exception: %s', exc,
instance=instance)
self._build_resources_cleanup(instance, network_info)
raise
with self.rt.instance_claim(context, instance, node, allocations,
limits):
self.driver.spawn(context, instance, image_meta,
@ -6581,7 +6608,8 @@ class ComputeManager(manager.Manager):
admin_password=None,
allocations=allocations,
network_info=network_info,
block_device_info=block_device_info)
block_device_info=block_device_info,
accel_info=accel_info)
except Exception:
with excutils.save_and_reraise_exception(logger=LOG):
LOG.exception('Instance failed to spawn',

View File

@ -380,6 +380,9 @@ class ComputeAPI(object):
build_and_run_instance()
* 5.12 - Add accel_uuids (accelerator requests) parameter to
rebuild_instance()
* 5.13 - Add accel_uuids (accelerator requests) parameter to
shelve_instance(), shelve_offload_instance() and
unshelve_instance()
'''
VERSION_ALIASES = {
@ -1350,40 +1353,65 @@ class ComputeAPI(object):
cctxt.cast(ctxt, 'restore_instance', instance=instance)
def shelve_instance(self, ctxt, instance, image_id=None,
clean_shutdown=True):
version = '5.0'
cctxt = self.router.client(ctxt).prepare(
clean_shutdown=True, accel_uuids=None):
msg_kwargs = {
'instance': instance,
'image_id': image_id,
'clean_shutdown': clean_shutdown,
'accel_uuids': accel_uuids,
}
client = self.router.client(ctxt)
version = '5.13'
if not client.can_send_version(version):
if accel_uuids:
LOG.error("Shelve with accelerators is not supported as "
"RPC version is too old.")
raise exception.ForbiddenWithAccelerators()
else:
msg_kwargs.pop('accel_uuids')
version = '5.0'
cctxt = client.prepare(
server=_compute_host(None, instance), version=version)
cctxt.cast(ctxt, 'shelve_instance', instance=instance,
image_id=image_id, clean_shutdown=clean_shutdown)
cctxt.cast(ctxt, 'shelve_instance', **msg_kwargs)
def shelve_offload_instance(self, ctxt, instance,
clean_shutdown=True):
version = '5.0'
cctxt = self.router.client(ctxt).prepare(
clean_shutdown=True, accel_uuids=None):
msg_kwargs = {
'instance': instance,
'clean_shutdown': clean_shutdown,
'accel_uuids': accel_uuids,
}
client = self.router.client(ctxt)
version = '5.13'
if not client.can_send_version(version):
msg_kwargs.pop('accel_uuids')
version = '5.0'
cctxt = client.prepare(
server=_compute_host(None, instance), version=version)
cctxt.cast(ctxt, 'shelve_offload_instance', instance=instance,
clean_shutdown=clean_shutdown)
cctxt.cast(ctxt, 'shelve_offload_instance', **msg_kwargs)
def unshelve_instance(self, ctxt, instance, host, request_spec, image=None,
filter_properties=None, node=None):
version = '5.2'
filter_properties=None, node=None, accel_uuids=None):
version = '5.13'
msg_kwargs = {
'instance': instance,
'image': image,
'filter_properties': filter_properties,
'node': node,
'request_spec': request_spec,
'accel_uuids': accel_uuids,
}
client = self.router.client(ctxt)
if not client.can_send_version(version):
msg_kwargs.pop('accel_uuids')
version = '5.2'
if not client.can_send_version(version):
msg_kwargs.pop('request_spec')
version = '5.0'
cctxt = client.prepare(
server=host, version=version)
cctxt = client.prepare(server=host, version=version)
cctxt.cast(ctxt, 'unshelve_instance', **msg_kwargs)
def volume_snapshot_create(self, ctxt, instance, volume_id,

View File

@ -838,7 +838,7 @@ class ComputeTaskManager(base.Base):
try:
accel_uuids = self._create_and_bind_arq_for_instance(
context, instance, host, local_reqspec)
context, instance, host.nodename, local_reqspec)
except Exception as exc:
LOG.exception('Failed to reschedule. Reason: %s', exc)
self._cleanup_when_reschedule_fails(
@ -858,7 +858,7 @@ class ComputeTaskManager(base.Base):
limits=host.limits, host_list=host_list,
accel_uuids=accel_uuids)
def _create_and_bind_arq_for_instance(self, context, instance, host,
def _create_and_bind_arq_for_instance(self, context, instance, hostname,
request_spec):
try:
resource_provider_mapping = (
@ -867,7 +867,7 @@ class ComputeTaskManager(base.Base):
# http://lists.openstack.org/pipermail/openstack-discuss/2019-November/011044.html # noqa
return self._create_and_bind_arqs(
context, instance.uuid, instance.flavor.extra_specs,
host.nodename, resource_provider_mapping)
hostname, resource_provider_mapping)
except exception.AcceleratorRequestBindingFailed as exc:
# If anything failed here we need to cleanup and bail out.
cyclient = cyborg.get_client(context)
@ -963,14 +963,19 @@ class ComputeTaskManager(base.Base):
filter_properties = request_spec.\
to_legacy_filter_properties_dict()
port_res_req = (
external_resources = (
self.network_api.get_requested_resource_for_instance(
context, instance.uuid))
# NOTE(gibi): When cyborg or other module wants to handle
# similar non-nova resources then here we have to collect
# all the external resource requests in a single list and
extra_specs = request_spec.flavor.extra_specs
device_profile = extra_specs.get('accel:device_profile')
external_resources.extend(
cyborg.get_device_profile_request_groups(
context, device_profile) if device_profile else [])
# NOTE(gibi): When other modules want to handle similar
# non-nova resources then here we have to collect all
# the external resource requests in a single list and
# add them to the RequestSpec.
request_spec.requested_resources = port_res_req
request_spec.requested_resources = external_resources
# NOTE(cfriesen): Ensure that we restrict the scheduler to
# the cell specified by the instance mapping.
@ -996,9 +1001,14 @@ class ComputeTaskManager(base.Base):
scheduler_utils.fill_provider_mapping(
request_spec, selection)
# NOTE(brinzhang): For unshelve operation we should
# re-create-and-bound the arqs for the instance.
accel_uuids = self._create_and_bind_arq_for_instance(
context, instance, node, request_spec)
self.compute_rpcapi.unshelve_instance(
context, instance, host, request_spec, image=image,
filter_properties=filter_properties, node=node)
filter_properties=filter_properties, node=node,
accel_uuids=accel_uuids)
except (exception.NoValidHost,
exception.UnsupportedPolicyException):
instance.task_state = None
@ -1006,7 +1016,11 @@ class ComputeTaskManager(base.Base):
LOG.warning("No valid host found for unshelve instance",
instance=instance)
return
except Exception:
except Exception as exc:
if isinstance(exc, exception.AcceleratorRequestBindingFailed):
cyclient = cyborg.get_client(context)
cyclient.delete_arqs_by_uuid(exc.arqs)
LOG.exception('Failed to unshelve. Reason: %s', exc)
with excutils.save_and_reraise_exception():
instance.task_state = None
instance.save()
@ -1225,12 +1239,24 @@ class ComputeTaskManager(base.Base):
instance.availability_zone = (
availability_zones.get_host_availability_zone(
context, host))
accel_uuids = []
try:
accel_uuids = self._rebuild_cyborg_arq(
context, instance, host, request_spec, evacuate)
except exception.AcceleratorRequestBindingFailed as exc:
cyclient = cyborg.get_client(context)
cyclient.delete_arqs_by_uuid(exc.arqs)
if instance.flavor.extra_specs.get('accel:device_profile'):
cyclient = cyborg.get_client(context)
if evacuate:
# NOTE(brinzhang): For evacuate operation we should
# delete the bound arqs, then re-create-and-bound the
# arqs for the instance.
cyclient.delete_arqs_for_instance(instance.uuid)
accel_uuids = self._create_and_bind_arq_for_instance(
context, instance, node, request_spec)
else:
accel_uuids = cyclient.get_arq_uuids_for_instance(
instance)
except Exception as exc:
if isinstance(exc, exception.AcceleratorRequestBindingFailed):
cyclient = cyborg.get_client(context)
cyclient.delete_arqs_by_uuid(exc.arqs)
LOG.exception('Failed to rebuild. Reason: %s', exc)
raise exc
@ -1253,22 +1279,6 @@ class ComputeTaskManager(base.Base):
request_spec=request_spec,
accel_uuids=accel_uuids)
def _rebuild_cyborg_arq(
self, context, instance, host, request_spec, evacuate):
dp_name = instance.flavor.extra_specs.get('accel:device_profile')
if not dp_name:
return []
cyclient = cyborg.get_client(context)
if not evacuate:
return cyclient.get_arq_uuids_for_instance(instance)
cyclient.delete_arqs_for_instance(instance.uuid)
resource_provider_mapping = request_spec.get_request_group_mapping()
return self._create_and_bind_arqs(
context, instance.uuid, instance.flavor.extra_specs,
host, resource_provider_mapping)
def _validate_image_traits_for_rebuild(self, context, instance, image_ref):
"""Validates that the traits specified in the image can be satisfied
by the providers of the current allocations for the instance during
@ -1668,7 +1678,7 @@ class ComputeTaskManager(base.Base):
try:
accel_uuids = self._create_and_bind_arq_for_instance(
context, instance, host, request_spec)
context, instance, host.nodename, request_spec)
except Exception as exc:
with excutils.save_and_reraise_exception():
self._cleanup_build_artifacts(

View File

@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__)
# NOTE(danms): This is the global service version counter
SERVICE_VERSION = 53
SERVICE_VERSION = 54
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
@ -190,6 +190,10 @@ SERVICE_VERSION_HISTORY = (
# Version 53: Compute RPC v5.12:
# Add accel_uuids (accelerator requests) param to rebuild_instance
{'compute_rpc': '5.12'},
# Version 54: Compute RPC v5.13:
# Add accel_uuids (accelerator requests) param to shelve_instance and
# shelve_offload_instance and unshelve_instance
{'compute_rpc': '5.13'},
)
# This is used to raise an error at service startup if older than N-1 computes

View File

@ -3095,7 +3095,10 @@ class CyborgFixture(fixtures.Fixture):
This function uses bindings indexed by instance UUID to
populate the bound ARQ templates in CyborgFixture.bound_arq_list.
"""
arq_host_rp_list = CyborgFixture.bindings_by_instance[instance_uuid]
arq_host_rp_list = CyborgFixture.bindings_by_instance.get(
instance_uuid)
if not arq_host_rp_list:
return []
# The above looks like:
# [{'hostname': $hostname,
# 'device_rp_uuid': $device_rp_uuid,
@ -3117,7 +3120,7 @@ class CyborgFixture(fixtures.Fixture):
@staticmethod
def fake_delete_arqs_for_instance(instance_uuid):
return None
CyborgFixture.bindings_by_instance.pop(instance_uuid, None)
def setUp(self):
super(CyborgFixture, self).setUp()

View File

@ -486,9 +486,17 @@ class InstanceHelperMixin:
self.api.post_server_action(server['id'], {'shelve': {}})
return self._wait_for_state_change(server, expected_state)
def _shelve_offload_server(self, server):
"""Shelve offload a server."""
self.api.post_server_action(server['id'], {'shelveOffload': {}})
self._wait_for_server_parameter(server,
{'status': 'SHELVED_OFFLOADED',
'OS-EXT-SRV-ATTR:host': None,
'OS-EXT-AZ:availability_zone': ''})
def _unshelve_server(self, server, expected_state='ACTIVE'):
"""Unshelve a server."""
self.api.post_server_action(server['id'], {'unshelve': {}})
self.api.post_server_action(server['id'], {'unshelve': None})
return self._wait_for_state_change(server, expected_state)
def _evacuate_server(

View File

@ -33,6 +33,7 @@ from oslo_utils import timeutils
from nova.compute import api as compute_api
from nova.compute import instance_actions
from nova.compute import manager as compute_manager
from nova.compute import rpcapi as compute_rpcapi
from nova import context
from nova import exception
from nova.network import constants
@ -7890,6 +7891,29 @@ class AcceleratorServerOpsTest(AcceleratorServerBase):
self._wait_for_state_change(self.server, 'ACTIVE')
self._check_allocations_usage(self.server)
def test_shelve_and_unshelve_ok(self):
self.flags(shelved_offload_time=1)
arqs = self.cyborg.fake_get_arqs_for_instance(self.server['id'])
self.assertEqual(len(arqs), 1)
self._shelve_server(self.server, 'SHELVED')
arqs = self.cyborg.fake_get_arqs_for_instance(self.server['id'])
self.assertEqual(len(arqs), 1)
self._unshelve_server(self.server)
arqs = self.cyborg.fake_get_arqs_for_instance(self.server['id'])
self.assertEqual(len(arqs), 1)
def test_shelve_offload_and_unshelve_ok(self):
self.flags(shelved_offload_time=-1)
arqs = self.cyborg.fake_get_arqs_for_instance(self.server['id'])
self.assertEqual(len(arqs), 1)
self._shelve_server(self.server, 'SHELVED')
self._shelve_offload_server(self.server)
arqs = self.cyborg.fake_get_arqs_for_instance(self.server['id'])
self.assertEqual(len(arqs), 0)
self._unshelve_server(self.server)
arqs = self.cyborg.fake_get_arqs_for_instance(self.server['id'])
self.assertEqual(len(arqs), 1)
def test_resize_fails(self):
ex = self.assertRaises(client.OpenStackApiException,
self.api.post_server_action, self.server['id'],
@ -7949,6 +7973,81 @@ class AcceleratorServerOpsTest(AcceleratorServerBase):
self.assertEqual(403, ex.response.status_code)
self._check_allocations_usage(self.server)
@mock.patch.object(objects.service, 'get_minimum_version_all_cells')
def test_shelve_old_compute(self, old_compute_version):
"""Tests when the source compute service is too old to call
shelve so OpenStackApiException is raised.
"""
old_compute_version.return_value = 53
ex = self.assertRaises(client.OpenStackApiException,
self.api.post_server_action, self.server['id'],
{'shelve': {}})
self.assertEqual(403, ex.response.status_code)
self._check_allocations_usage(self.server)
def _test_shelve_instance_with_compute_rpc_pin(
self, version_cap, body=None):
self.flags(compute=version_cap, group='upgrade_levels')
self.flags(shelved_offload_time=-1)
self.api.post_server_action(self.server['id'], body)
self._wait_for_state_change(self.server, 'SHELVED')
def test_shelve_with_compute_rpc_pin_5_0(self):
self.flags(compute=5.0, group='upgrade_levels')
compute_rpcapi.reset_globals()
ex = self.assertRaises(
client.OpenStackApiException, self.api.post_server_action,
self.server['id'], {'shelve': {}})
self.assertEqual(403, ex.response.status_code)
def test_shelve_instance_5_13(self):
body = {'shelve': {}}
self._test_shelve_instance_with_compute_rpc_pin(
'5.13', body=body)
def _test_shelve_offload_instance_with_compute_rpc_pin(self, version_cap):
self.flags(compute=version_cap, group='upgrade_levels')
self.flags(shelved_offload_time=2)
self.api.post_server_action(self.server['id'], {'shelve': {}})
self._wait_for_state_change(self.server, 'SHELVED')
self.api.post_server_action(self.server['id'], {'shelveOffload': {}})
self._wait_for_state_change(self.server, 'SHELVED_OFFLOADED')
def test_shelve_offload_instance_5_0(self):
self._test_shelve_offload_instance_with_compute_rpc_pin('5.0')
def test_shelve_offload_instance_5_13(self):
self._test_shelve_offload_instance_with_compute_rpc_pin('5.13')
def _test_unshelve_instance_with_compute_rpc_pin(
self, version_cap, body=None):
self.flags(compute=version_cap, group='upgrade_levels')
self.api.microversion = '2.87'
self.api.post_server_action(self.server['id'], {'shelve': {}})
self._wait_for_state_change(self.server, 'SHELVED_OFFLOADED')
self.api.post_server_action(self.server['id'], body)
self._wait_for_state_change(self.server, 'ACTIVE')
def test_unshelve_instance_5_0(self):
body = {'unshelve': None}
self._test_unshelve_instance_with_compute_rpc_pin(
'5.0', body=body)
def test_unshelve_instance_5_2(self):
body = {'unshelve': None}
self._test_unshelve_instance_with_compute_rpc_pin(
'5.2', body=body)
def test_unshelve_instance_5_13(self):
body = {'unshelve': None}
self._test_unshelve_instance_with_compute_rpc_pin(
'5.13', body=body)
class CrossCellResizeWithQoSPort(PortResourceRequestBasedSchedulingTestBase):
NUMBER_OF_CELLS = 2

View File

@ -7637,6 +7637,127 @@ class ComputeAPIUnitTestCase(_ComputeAPIUnitTestMixIn, test.NoDBTestCase):
# myfunc was not called
self.assertEqual({}, args_info)
@mock.patch('nova.accelerator.cyborg._CyborgClient.'
'get_arq_uuids_for_instance')
@mock.patch.object(compute_utils, 'create_image')
def test_get_arqs_in_shelve(self, mock_create_img, mock_get_arq_uuids):
"""Test get_arq_uuids_for_instance() was called if the
'accel:device_profile' exist in the flavor for shelve operation.
"""
extra_specs = {'accel:device_profile': 'mydp'}
flavor = self._create_flavor(extra_specs=extra_specs)
params = dict(display_name="vm1")
instance = self._create_instance_obj(params=params, flavor=flavor)
mock_create_img.return_value = dict(id='fake-image-id')
mock_get_arq_uuids.return_value = (
['983d1742-c8ca-4f8a-9f3c-3f54bbafaa7d'])
accel_uuids = mock_get_arq_uuids.return_value
with test.nested(
mock.patch('nova.objects.service.get_minimum_version_all_cells',
return_value=54),
mock.patch('nova.compute.utils.is_volume_backed_instance',
return_value=False),
mock.patch.object(objects.Instance, 'save'),
mock.patch.object(self.compute_api, '_record_action_start'),
mock.patch.object(compute_rpcapi.ComputeAPI, 'shelve_instance'),
) as (
mock_get_min, mock_volume_backend,
mock_instance_save, mock_record_action, mock_shelve_service
):
self.compute_api.shelve(self.context, instance)
mock_get_arq_uuids.assert_called_once()
mock_shelve_service.assert_called_once_with(
self.context, instance=instance, image_id='fake-image-id',
clean_shutdown=True, accel_uuids=accel_uuids)
@mock.patch('nova.accelerator.cyborg._CyborgClient.'
'get_arq_uuids_for_instance')
@mock.patch.object(compute_utils, 'create_image')
def test_shelve_with_unsupport_accelerators(
self, mock_create_img, mock_get_arq_uuids):
"""Test get_arq_uuids_for_instance() was called if the
'accel:device_profile' exist in the flavor for shelve operation.
"""
extra_specs = {'accel:device_profile': 'mydp'}
flavor = self._create_flavor(extra_specs=extra_specs)
params = dict(display_name="vm1")
instance = self._create_instance_obj(params=params, flavor=flavor)
with test.nested(
mock.patch('nova.objects.service.get_minimum_version_all_cells',
return_value=54),
mock.patch('nova.compute.utils.is_volume_backed_instance',
return_value=False),
mock.patch.object(objects.Instance, 'save'),
mock.patch.object(self.compute_api, '_record_action_start'),
mock.patch.object(compute_rpcapi.ComputeAPI, 'shelve_instance'),
) as (
mock_get_min, mock_volume_backend,
mock_instance_save, mock_record_action, mock_shelve_service
):
mock_shelve_service.side_effect = (
exception.ForbiddenWithAccelerators(
oldest_supported_version=54,
scope=mock.ANY,
min_service_level=54,
oldest_supported_service=54))
self.assertRaises(
exception.ForbiddenWithAccelerators,
self.compute_api.shelve,
self.context,
instance)
@mock.patch('nova.accelerator.cyborg._CyborgClient.'
'get_arq_uuids_for_instance')
@mock.patch('nova.objects.service.get_minimum_version_all_cells',
return_value=53)
def test_shelve_until_service_forbidden(self, mock_get_min,
mock_get_arq_uuuids):
"""Ensure a 'ForbiddenWithAccelerators' exception raises if any
compute service less than the version of 54.
"""
extra_specs = {'accel:device_profile': 'mydp'}
flavor = self._create_flavor(extra_specs=extra_specs)
instance = self._create_instance_obj(flavor=flavor)
self.assertRaisesRegex(exception.ForbiddenWithAccelerators,
'Forbidden with instances that have accelerators.',
self.compute_api.shelve, self.context, instance)
mock_get_arq_uuuids.assert_not_called()
@mock.patch('nova.accelerator.cyborg._CyborgClient.'
'get_arq_uuids_for_instance')
@mock.patch.object(compute_utils, 'create_image')
def test_get_arqs_in_shelve_offload(self, mock_create_img,
mock_get_arq_uuids):
"""Test get_arq_uuids_for_instance() was called if the
'accel:device_profile' exist in the flavor for
shelve offload operation.
"""
extra_specs = {'accel:device_profile': 'mydp'}
flavor = self._create_flavor(extra_specs=extra_specs)
params = {'vm_state': vm_states.SHELVED}
instance = self._create_instance_obj(params=params, flavor=flavor)
mock_create_img.return_value = dict(id='fake-image-id')
mock_get_arq_uuids.return_value = [uuids.fake]
accel_uuids = mock_get_arq_uuids.return_value
with test.nested(
mock.patch('nova.objects.service.get_minimum_version_all_cells',
return_value=54),
mock.patch.object(objects.Instance, 'save'),
mock.patch.object(self.compute_api, '_record_action_start'),
mock.patch.object(compute_rpcapi.ComputeAPI,
'shelve_offload_instance'),
) as (
mock_get_min, mock_instance_save,
mock_record_action, mock_shelve_service
):
self.compute_api.shelve_offload(self.context, instance)
mock_get_arq_uuids.assert_called_once()
mock_shelve_service.assert_called_once_with(
self.context, instance=instance, clean_shutdown=True,
accel_uuids=accel_uuids)
# TODO(huaqiang): Remove in Wallaby
@mock.patch('nova.objects.service.get_minimum_version_all_cells')
def test__check_compute_service_for_mixed_instance(self, mock_ver):

View File

@ -897,19 +897,67 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
def test_shelve_instance(self):
self._test_compute_api('shelve_instance', 'cast',
instance=self.fake_instance_obj, image_id='image_id',
clean_shutdown=True, version='5.0')
clean_shutdown=True, accel_uuids=None, version='5.13')
def test_shelve_instance_old_rpcapi(self):
# With rpcapi < 5.13, accel_uuids must be dropped in the client call.
ctxt = context.RequestContext('fake_user', 'fake_project')
compute_api = compute_rpcapi.ComputeAPI()
compute_api.router.client = mock.Mock()
mock_client = mock.MagicMock()
compute_api.router.client.return_value = mock_client
# Force can_send_version to False, so that 5.0 version is used.
mock_client.can_send_version.return_value = False
mock_cctx = mock.MagicMock()
mock_client.prepare.return_value = mock_cctx
compute_api.shelve_instance(
ctxt, instance=self.fake_instance_obj,
accel_uuids=[],
image_id='image_id', clean_shutdown=True)
mock_client.can_send_version.assert_called_once_with('5.13')
mock_client.prepare.assert_called_with(
server=self.fake_instance_obj.host, version='5.0')
mock_cctx.cast.assert_called_with( # No accel_uuids
ctxt, 'shelve_instance',
instance=self.fake_instance_obj,
image_id='image_id', clean_shutdown=True)
def test_shelve_offload_instance(self):
self._test_compute_api('shelve_offload_instance', 'cast',
instance=self.fake_instance_obj,
clean_shutdown=True, version='5.0')
clean_shutdown=True, accel_uuids=None, version='5.13')
def test_shelve_offload_instance_old_rpcapi(self):
# With rpcapi < 5.13, accel_uuids must be dropped in the client call.
ctxt = context.RequestContext('fake_user', 'fake_project')
compute_api = compute_rpcapi.ComputeAPI()
compute_api.router.client = mock.Mock()
mock_client = mock.MagicMock()
compute_api.router.client.return_value = mock_client
# Force can_send_version to False, so that 5.0 version is used.
mock_client.can_send_version.return_value = False
mock_cctx = mock.MagicMock()
mock_client.prepare.return_value = mock_cctx
compute_api.shelve_offload_instance(
ctxt, instance=self.fake_instance_obj,
accel_uuids=['938af7f9-f136-4e5a-bdbe-3b6feab54311'],
clean_shutdown=True,)
mock_client.can_send_version.assert_called_once_with('5.13')
mock_client.prepare.assert_called_with(
server=self.fake_instance_obj.host, version='5.0')
mock_cctx.cast.assert_called_with( # No accel_uuids
ctxt, 'shelve_offload_instance',
instance=self.fake_instance_obj,
clean_shutdown=True)
def test_unshelve_instance(self):
self._test_compute_api('unshelve_instance', 'cast',
instance=self.fake_instance_obj, host='host', image='image',
filter_properties={'fakeprop': 'fakeval'}, node='node',
request_spec=self.fake_request_spec_obj,
version='5.2')
request_spec=self.fake_request_spec_obj, accel_uuids=None,
version='5.13')
def test_cache_image(self):
self._test_compute_api('cache_images', 'call',
@ -943,7 +991,10 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
host='host', request_spec=self.fake_request_spec_obj,
image='image')
mock_client.can_send_version.assert_called_once_with('5.2')
mock_client.can_send_version.assert_has_calls([
mock.call('5.13'),
mock.call('5.2'),
])
mock_client.prepare.assert_called_with(
server='host', version='5.0')
mock_cctx.cast.assert_called_with(

View File

@ -10,6 +10,7 @@
# License for the specific language governing permissions and limitations
# under the License.
import eventlet
import mock
from oslo_utils import fixture as utils_fixture
from oslo_utils.fixture import uuidsentinel as uuids
@ -63,7 +64,8 @@ class ShelveComputeManagerTestCase(test_compute.BaseTestCase):
mock_notify_instance_usage, mock_get_power_state,
mock_snapshot, mock_power_off, mock_terminate,
mock_get_bdms, clean_shutdown=True,
guest_power_state=power_state.RUNNING):
guest_power_state=power_state.RUNNING,
accel_uuids=None):
mock_get_power_state.return_value = 123
CONF.set_override('shelved_offload_time', shelved_offload_time)
@ -127,7 +129,8 @@ class ShelveComputeManagerTestCase(test_compute.BaseTestCase):
mock_save.side_effect = check_save
self.compute.shelve_instance(self.context, instance,
image_id=image_id,
clean_shutdown=clean_shutdown)
clean_shutdown=clean_shutdown,
accel_uuids=accel_uuids)
mock_notify.assert_has_calls([
mock.call(self.context, instance, 'fake-mini',
action='shelve', phase='start', bdms=fake_bdms),
@ -181,6 +184,18 @@ class ShelveComputeManagerTestCase(test_compute.BaseTestCase):
def test_shelve_paused_instance(self):
self._shelve_instance(-1, guest_power_state=power_state.PAUSED)
@mock.patch('nova.accelerator.cyborg._CyborgClient.'
'delete_arqs_for_instance')
def test_shelve_and_offload_with_accel_uuids(self, mock_del_arqs):
self._shelve_instance(0, accel_uuids=[uuids.fake])
mock_del_arqs.assert_called_once()
@mock.patch('nova.accelerator.cyborg._CyborgClient.'
'delete_arqs_for_instance')
def test_shelve_and_with_accel_uuids(self, mock_del_arqs):
self._shelve_instance(-1, accel_uuids=[uuids.fake])
mock_del_arqs.assert_not_called()
@mock.patch.object(nova.virt.fake.SmallFakeDriver, 'power_off')
def test_shelve_offload(self, mock_power_off):
instance = self._shelve_offload()
@ -279,10 +294,12 @@ class ShelveComputeManagerTestCase(test_compute.BaseTestCase):
mock_get_power_state, mock_spawn,
mock_prep_block_device, mock_notify_instance_usage,
mock_notify_instance_action,
mock_get_bdms):
mock_get_bdms,
accel_uuids=None,
instance=None):
mock_bdms = mock.Mock()
mock_get_bdms.return_value = mock_bdms
instance = self._create_fake_instance_obj()
instance = instance or self._create_fake_instance_obj()
instance.task_state = task_states.UNSHELVING
instance.save()
image = {'id': uuids.image_id}
@ -340,7 +357,8 @@ class ShelveComputeManagerTestCase(test_compute.BaseTestCase):
self.compute.unshelve_instance(
self.context, instance, image=image,
filter_properties=filter_properties,
node=node, request_spec=objects.RequestSpec())
node=node, request_spec=objects.RequestSpec(),
accel_uuids=accel_uuids)
mock_notify_instance_action.assert_has_calls([
mock.call(self.context, instance, 'fake-mini',
@ -362,7 +380,7 @@ class ShelveComputeManagerTestCase(test_compute.BaseTestCase):
mock_spawn.assert_called_once_with(self.context, instance,
test.MatchType(objects.ImageMeta), injected_files=[],
admin_password=None, allocations={}, network_info=[],
block_device_info='fake_bdm')
block_device_info='fake_bdm', accel_info=mock.ANY)
self.mock_get_allocations.assert_called_once_with(self.context,
instance.uuid)
mock_get_power_state.assert_called_once_with(instance)
@ -380,6 +398,57 @@ class ShelveComputeManagerTestCase(test_compute.BaseTestCase):
self.assertEqual(self.compute.host, instance.host)
self.assertFalse(instance.auto_disk_config)
@mock.patch.object(nova.compute.manager.ComputeManager,
'_get_bound_arq_resources')
def test_unshelve_with_arqs(self, mock_get_arqs):
self.test_unshelve(accel_uuids=[uuids.fake])
mock_get_arqs.assert_called_once()
@mock.patch.object(nova.compute.manager.ComputeManager,
'_nil_out_instance_obj_host_and_node')
@mock.patch.object(nova.compute.manager.ComputeManager,
'_terminate_volume_connections')
@mock.patch.object(nova.compute.manager.ComputeManager,
'_build_resources_cleanup')
@mock.patch.object(nova.compute.manager.ComputeManager,
'_get_bound_arq_resources')
def test_unshelve_with_arqs_failes_cleanup_resources(
self, mock_get_arqs, mock_cleanup_resources,
mock_bdms, mock_nil):
instance = self._create_fake_instance_obj()
mock_get_arqs.side_effect = exception.AcceleratorRequestOpFailed(
instance_uuid=instance.uuid, op='get',
msg='Failure getting accelerator requests.')
exc = self.assertRaises(exception.AcceleratorRequestOpFailed,
self.test_unshelve,
accel_uuids=[uuids.fake],
instance=instance)
self.assertIn('Failure getting accelerator requests.', str(exc))
mock_cleanup_resources.assert_called_once()
@mock.patch.object(nova.compute.manager.ComputeManager,
'_nil_out_instance_obj_host_and_node')
@mock.patch.object(nova.compute.manager.ComputeManager,
'_terminate_volume_connections')
@mock.patch.object(nova.compute.manager.ComputeManager,
'_build_resources_cleanup')
@mock.patch.object(nova.compute.manager.ComputeManager,
'_get_bound_arq_resources')
def test_unshelve_with_arqs_failes_cleanup_resources_timeout(
self, mock_get_arqs, mock_cleanup_resources,
mock_bdms, mock_nil):
instance = self._create_fake_instance_obj()
mock_get_arqs.side_effect = eventlet.timeout.Timeout()
self.assertRaises(eventlet.timeout.Timeout,
self.test_unshelve,
accel_uuids=[uuids.fake],
instance=instance)
mock_cleanup_resources.assert_called_once()
@mock.patch('nova.objects.BlockDeviceMappingList.get_by_instance_uuid')
@mock.patch('nova.compute.utils.notify_about_instance_action')
@mock.patch.object(nova.compute.resource_tracker.ResourceTracker,
@ -464,7 +533,7 @@ class ShelveComputeManagerTestCase(test_compute.BaseTestCase):
{}, limits)
mock_spawn.assert_called_once_with(self.context, instance,
test.MatchType(objects.ImageMeta),
injected_files=[], admin_password=None,
injected_files=[], admin_password=None, accel_info=[],
allocations={}, network_info=[], block_device_info='fake_bdm')
self.mock_get_allocations.assert_called_once_with(self.context,
instance.uuid)
@ -551,7 +620,7 @@ class ShelveComputeManagerTestCase(test_compute.BaseTestCase):
{}, limits)
mock_spawn.assert_called_once_with(
self.context, instance, test.MatchType(objects.ImageMeta),
injected_files=[], admin_password=None,
injected_files=[], admin_password=None, accel_info=[],
allocations={}, network_info=[], block_device_info='fake_bdm')
mock_terminate_volume_connections.assert_called_once_with(
self.context, instance, mock_bdms)
@ -767,11 +836,11 @@ class ShelveComputeAPITestCase(test_compute.BaseTestCase):
if boot_from_volume:
rpcapi_shelve_offload_instance.assert_called_once_with(
self.context, instance=instance,
clean_shutdown=clean_shutdown)
clean_shutdown=clean_shutdown, accel_uuids=[])
else:
rpcapi_shelve_instance.assert_called_once_with(
self.context, instance=instance, image_id='fake-image-id',
clean_shutdown=clean_shutdown)
clean_shutdown=clean_shutdown, accel_uuids=[])
db.instance_destroy(self.context, instance['uuid'])
@ -831,7 +900,7 @@ class ShelveComputeAPITestCase(test_compute.BaseTestCase):
instance_save.assert_called_once_with(expected_task_state=[None])
rpcapi_shelve_offload_instance.assert_called_once_with(
self.context, instance=fake_instance,
clean_shutdown=clean_shutdown)
clean_shutdown=clean_shutdown, accel_uuids=[])
record.assert_called_once_with(self.context, fake_instance,
instance_actions.SHELVE_OFFLOAD)

View File

@ -1340,6 +1340,7 @@ class _BaseTaskTestCase(object):
system_metadata['shelved_host'] = 'fake-mini'
fake_spec = fake_request_spec.fake_spec_obj()
fake_spec.flavor = instance.flavor
# FIXME(sbauza): Modify the fake RequestSpec object to either add a
# non-empty SchedulerRetries object or nullify the field
fake_spec.retry = None
@ -1409,6 +1410,7 @@ class _BaseTaskTestCase(object):
unshelve_instance.assert_called_once_with(
self.context, mock.ANY, host['host'],
test.MatchType(objects.RequestSpec), image=mock.ANY,
accel_uuids=[],
filter_properties=filter_properties, node=host['nodename']
)
@ -1450,6 +1452,7 @@ class _BaseTaskTestCase(object):
instance.task_state = task_states.UNSHELVING
# 'shelved_image_id' is None for volumebacked instance
instance.system_metadata['shelved_image_id'] = None
self.request_spec.flavor = instance.flavor
with test.nested(
mock.patch.object(self.conductor_manager,
@ -1487,6 +1490,7 @@ class _BaseTaskTestCase(object):
mock_im.return_value = objects.InstanceMapping(
cell_mapping=cell_mapping)
instance = self._create_fake_instance_obj()
fake_spec.flavor = instance.flavor
instance.vm_state = vm_states.SHELVED_OFFLOADED
instance.save()
system_metadata = instance.system_metadata
@ -1506,6 +1510,7 @@ class _BaseTaskTestCase(object):
filter_properties=dict(
# populate_filter_properties adds limits={}
fake_spec.to_legacy_filter_properties_dict(), limits={}),
accel_uuids=[],
node='fake_node')
def test_unshelve_instance_schedule_and_rebuild_novalid_host(self):
@ -1513,6 +1518,7 @@ class _BaseTaskTestCase(object):
instance.vm_state = vm_states.SHELVED_OFFLOADED
instance.save()
system_metadata = instance.system_metadata
self.request_spec.flavor = instance.flavor
def fake_schedule_instances(context, request_spec, *instances,
**kwargs):
@ -1559,6 +1565,7 @@ class _BaseTaskTestCase(object):
system_metadata['shelved_at'] = timeutils.utcnow()
system_metadata['shelved_image_id'] = 'fake_image_id'
system_metadata['shelved_host'] = 'fake-mini'
self.request_spec.flavor = instance.flavor
self.assertRaises(messaging.MessagingTimeout,
self.conductor_manager.unshelve_instance,
self.context, instance, self.request_spec)
@ -1582,6 +1589,7 @@ class _BaseTaskTestCase(object):
cell_mapping=objects.CellMapping.get_by_uuid(self.context,
uuids.cell1))
instance = self._create_fake_instance_obj()
fake_spec.flavor = instance.flavor
instance.vm_state = vm_states.SHELVED_OFFLOADED
instance.save()
system_metadata = instance.system_metadata
@ -1594,7 +1602,7 @@ class _BaseTaskTestCase(object):
self.context, fake_spec, [instance.uuid], return_alternates=False)
mock_unshelve.assert_called_once_with(
self.context, instance, 'fake_host', fake_spec, image=None,
filter_properties={'limits': {}}, node='fake_node')
accel_uuids=[], filter_properties={'limits': {}}, node='fake_node')
@mock.patch('nova.scheduler.utils.fill_provider_mapping')
@mock.patch('nova.network.neutron.API.get_requested_resource_for_instance')
@ -1607,6 +1615,7 @@ class _BaseTaskTestCase(object):
instance.save()
request_spec = objects.RequestSpec()
request_spec.flavor = instance.flavor
selection = objects.Selection(
service_host='fake_host',
@ -1629,6 +1638,87 @@ class _BaseTaskTestCase(object):
mock_fill_provider_mapping.assert_called_once_with(
request_spec, selection)
@mock.patch('nova.accelerator.cyborg.get_device_profile_request_groups')
@mock.patch.object(conductor_manager.ComputeTaskManager,
'_create_and_bind_arq_for_instance', )
@mock.patch('nova.scheduler.utils.fill_provider_mapping')
@mock.patch('nova.network.neutron.API.get_requested_resource_for_instance')
@mock.patch.object(conductor_manager.ComputeTaskManager,
'_schedule_instances', )
def test_unshelve_instance_resource_request_with_device_profile(
self, mock_schedule, mock_get_res_req, mock_fill_provider_mapping,
mock_get_arq, mock_get_dp):
instance = self._create_fake_instance_obj()
instance.vm_state = vm_states.SHELVED_OFFLOADED
instance.save()
request_spec = objects.RequestSpec()
request_spec.flavor = instance.flavor
request_spec.flavor.extra_specs = {'accel:device_profile': 'mydp'}
selection = objects.Selection(
service_host='fake_host',
nodename='fake_node',
limits=None)
mock_schedule.return_value = [[selection]]
mock_get_res_req.return_value = []
dp_groups = [objects.RequestGroup(requester_id='deviceprofile2'),
objects.RequestGroup(requester_id='deviceprofile3')]
mock_get_dp.return_value = dp_groups
mock_get_arq.return_value = []
self.conductor_manager.unshelve_instance(
self.context, instance, request_spec)
self.assertEqual(dp_groups, request_spec.requested_resources)
mock_get_res_req.assert_called_once_with(self.context, instance.uuid)
mock_schedule.assert_called_once_with(
self.context, request_spec, [instance.uuid],
return_alternates=False)
mock_fill_provider_mapping.assert_called_once_with(
request_spec, selection)
@mock.patch('nova.accelerator.cyborg._CyborgClient.delete_arqs_by_uuid')
@mock.patch('nova.accelerator.cyborg.get_device_profile_request_groups')
@mock.patch.object(conductor_manager.ComputeTaskManager,
'_create_and_bind_arq_for_instance', )
@mock.patch('nova.scheduler.utils.fill_provider_mapping')
@mock.patch('nova.network.neutron.API.get_requested_resource_for_instance')
@mock.patch.object(conductor_manager.ComputeTaskManager,
'_schedule_instances', )
def test_unshelve_instance_resource_request_with_arq_bind_fail(
self, mock_schedule, mock_get_res_req, mock_fill_provider_mapping,
mock_get_arqs, mock_get_dp, mock_del_arqs):
instance = self._create_fake_instance_obj()
instance.vm_state = vm_states.SHELVED_OFFLOADED
instance.save()
request_spec = objects.RequestSpec()
request_spec.flavor = instance.flavor
request_spec.flavor.extra_specs = {'accel:device_profile': 'mydp'}
selection = objects.Selection(
service_host='fake_host',
nodename='fake_node',
limits=None)
mock_schedule.return_value = [[selection]]
dp_groups = [objects.RequestGroup(requester_id='deviceprofile2'),
objects.RequestGroup(requester_id='deviceprofile3')]
mock_get_dp.return_value = dp_groups
mock_get_res_req.return_value = []
arqs = ["fake-arq-uuid"]
mock_get_arqs.side_effect = exc.AcceleratorRequestBindingFailed(
arqs=arqs, msg='')
ex = self.assertRaises(exc.AcceleratorRequestBindingFailed,
self.conductor_manager.unshelve_instance,
context=self.context, instance=instance,
request_spec=request_spec)
self.assertIn('Failed to bind accelerator requests', ex.message)
mock_del_arqs.assert_called_with(arqs)
def test_rebuild_instance(self):
inst_obj = self._create_fake_instance_obj()
rebuild_args, compute_args = self._prepare_rebuild_args(
@ -1698,6 +1788,57 @@ class _BaseTaskTestCase(object):
self.context, inst_obj, 'thebesthost', action='rebuild_scheduled',
source='nova-conductor')
@mock.patch('nova.accelerator.cyborg._CyborgClient.'
'get_arq_uuids_for_instance')
@mock.patch('nova.compute.utils.notify_about_instance_rebuild')
def test_rebuild_instance_with_device_profile(self, mock_notify,
mock_get_arqs):
inst_obj = self._create_fake_instance_obj()
inst_obj.flavor.extra_specs = {'accel:device_profile': 'mydp'}
inst_obj.host = 'noselect'
expected_host = 'thebesthost'
expected_node = 'thebestnode'
expected_limits = None
fake_selection = objects.Selection(service_host=expected_host,
nodename=expected_node, limits=None)
rebuild_args, compute_args = self._prepare_rebuild_args(
{'host': None, 'node': expected_node, 'limits': expected_limits})
fake_spec = objects.RequestSpec()
rebuild_args['request_spec'] = fake_spec
inst_uuids = [inst_obj.uuid]
arqs = ['fake-arq-uuid']
mock_get_arqs.return_value = arqs
with test.nested(
mock.patch.object(self.conductor_manager.compute_rpcapi,
'rebuild_instance'),
mock.patch.object(scheduler_utils, 'setup_instance_group',
return_value=False),
mock.patch.object(self.conductor_manager.query_client,
'select_destinations',
return_value=[[fake_selection]])
) as (rebuild_mock, sig_mock, select_dest_mock):
self.conductor_manager.rebuild_instance(context=self.context,
instance=inst_obj,
**rebuild_args)
self.ensure_network_metadata_mock.assert_called_once_with(
inst_obj)
self.heal_reqspec_is_bfv_mock.assert_called_once_with(
self.context, fake_spec, inst_obj)
select_dest_mock.assert_called_once_with(self.context, fake_spec,
inst_uuids, return_objects=True, return_alternates=False)
compute_args['host'] = expected_host
compute_args['request_spec'] = fake_spec
compute_args['accel_uuids'] = arqs
rebuild_mock.assert_called_once_with(self.context,
instance=inst_obj,
**compute_args)
self.assertEqual(inst_obj.project_id, fake_spec.project_id)
self.assertEqual('compute.instance.rebuild.scheduled',
fake_notifier.NOTIFICATIONS[0].event_type)
mock_notify.assert_called_once_with(
self.context, inst_obj, 'thebesthost', action='rebuild_scheduled',
source='nova-conductor')
def test_rebuild_instance_with_scheduler_no_host(self):
inst_obj = self._create_fake_instance_obj()
inst_obj.host = 'noselect'
@ -1896,6 +2037,151 @@ class _BaseTaskTestCase(object):
self.context, inst_obj, 'thebesthost', action='rebuild_scheduled',
source='nova-conductor')
@mock.patch(
'nova.accelerator.cyborg._CyborgClient.delete_arqs_for_instance')
@mock.patch.object(conductor_manager.ComputeTaskManager,
'_create_and_bind_arq_for_instance', )
@mock.patch('nova.accelerator.cyborg.get_device_profile_request_groups')
@mock.patch('nova.compute.utils.notify_about_instance_rebuild')
def test_evacuate_instance_with_request_spec_device_profile(
self, mock_notify, mock_get_dp, mock_get_arqs, mock_del_arqs):
inst_obj = self._create_fake_instance_obj()
inst_obj.host = 'noselect'
expected_host = 'thebesthost'
expected_node = 'thebestnode'
expected_limits = None
fake_selection = objects.Selection(service_host=expected_host,
nodename=expected_node, limits=None)
fake_spec = objects.RequestSpec(ignore_hosts=[uuids.ignored_host])
fake_spec.flavor = inst_obj.flavor
fake_spec.flavor.extra_specs = {'accel:device_profile': 'mydp'}
rebuild_args, compute_args = self._prepare_rebuild_args(
{'host': None, 'node': expected_node, 'limits': expected_limits,
'request_spec': fake_spec, 'recreate': True})
dp_groups = [objects.RequestGroup(requester_id='deviceprofile2'),
objects.RequestGroup(requester_id='deviceprofile3')]
mock_get_dp.return_value = dp_groups
arqs = ['fake-arq-uuid']
mock_get_arqs.return_value = arqs
with test.nested(
mock.patch.object(self.conductor_manager.compute_rpcapi,
'rebuild_instance'),
mock.patch.object(scheduler_utils, 'setup_instance_group',
return_value=False),
mock.patch.object(self.conductor_manager.query_client,
'select_destinations',
return_value=[[fake_selection]]),
mock.patch.object(fake_spec, 'reset_forced_destinations'),
mock.patch('nova.scheduler.utils.fill_provider_mapping'),
mock.patch('nova.network.neutron.API.'
'get_requested_resource_for_instance',
return_value=[])
) as (rebuild_mock, sig_mock, select_dest_mock, reset_fd,
fill_rp_mapping_mock, get_req_res_mock):
self.conductor_manager.rebuild_instance(context=self.context,
instance=inst_obj,
**rebuild_args)
reset_fd.assert_called_once_with()
# The RequestSpec.ignore_hosts field should be overwritten.
self.assertEqual([inst_obj.host], fake_spec.ignore_hosts)
# The RequestSpec.requested_destination.cell field should be set.
self.assertIn('requested_destination', fake_spec)
self.assertIn('cell', fake_spec.requested_destination)
self.assertIsNotNone(fake_spec.requested_destination.cell)
select_dest_mock.assert_called_once_with(self.context,
fake_spec, [inst_obj.uuid], return_objects=True,
return_alternates=False)
compute_args['host'] = expected_host
compute_args['request_spec'] = fake_spec
compute_args['accel_uuids'] = arqs
rebuild_mock.assert_called_once_with(self.context,
instance=inst_obj,
**compute_args)
get_req_res_mock.assert_called_once_with(
self.context, inst_obj.uuid)
fill_rp_mapping_mock.assert_called_once_with(
fake_spec, fake_selection)
mock_del_arqs.assert_called_once()
self.assertEqual('compute.instance.rebuild.scheduled',
fake_notifier.NOTIFICATIONS[0].event_type)
mock_notify.assert_called_once_with(
self.context, inst_obj, 'thebesthost', action='rebuild_scheduled',
source='nova-conductor')
@mock.patch('nova.accelerator.cyborg._CyborgClient.delete_arqs_by_uuid')
@mock.patch(
'nova.accelerator.cyborg._CyborgClient.delete_arqs_for_instance')
@mock.patch.object(conductor_manager.ComputeTaskManager,
'_create_and_bind_arq_for_instance', )
@mock.patch('nova.accelerator.cyborg.get_device_profile_request_groups')
@mock.patch('nova.compute.utils.notify_about_instance_rebuild')
def test_evacuate_instance_with_request_spec_arq_bind_fail(
self, mock_notify, mock_get_dp, mock_get_arqs,
mock_del_arqs_instance, mock_del_arqs):
inst_obj = self._create_fake_instance_obj()
inst_obj.host = 'noselect'
expected_host = 'thebesthost'
expected_node = 'thebestnode'
expected_limits = None
fake_selection = objects.Selection(service_host=expected_host,
nodename=expected_node, limits=None)
fake_spec = objects.RequestSpec(ignore_hosts=[uuids.ignored_host])
fake_spec.flavor = inst_obj.flavor
fake_spec.flavor.extra_specs = {'accel:device_profile': 'mydp'}
dp_groups = [objects.RequestGroup(requester_id='deviceprofile2'),
objects.RequestGroup(requester_id='deviceprofile3')]
mock_get_dp.return_value = dp_groups
fake_spec.requested_resources = dp_groups
rebuild_args, compute_args = self._prepare_rebuild_args(
{'host': None, 'node': expected_node, 'limits': expected_limits,
'request_spec': fake_spec, 'recreate': True})
arqs = ['fake-arq-uuid']
mock_get_arqs.side_effect = exc.AcceleratorRequestBindingFailed(
arqs=arqs, msg='')
with test.nested(
mock.patch.object(scheduler_utils, 'setup_instance_group',
return_value=False),
mock.patch.object(self.conductor_manager.query_client,
'select_destinations',
return_value=[[fake_selection]]),
mock.patch.object(fake_spec, 'reset_forced_destinations'),
mock.patch('nova.scheduler.utils.fill_provider_mapping'),
mock.patch('nova.network.neutron.API.'
'get_requested_resource_for_instance',
return_value=[])
) as (sig_mock, select_dest_mock, reset_fd,
fill_rp_mapping_mock, get_req_res_mock):
ex = self.assertRaises(exc.AcceleratorRequestBindingFailed,
self.conductor_manager.rebuild_instance,
context=self.context, instance=inst_obj,
**rebuild_args)
reset_fd.assert_called_once_with()
# The RequestSpec.ignore_hosts field should be overwritten.
self.assertEqual([inst_obj.host], fake_spec.ignore_hosts)
# The RequestSpec.requested_destination.cell field should be set.
self.assertIn('requested_destination', fake_spec)
self.assertIn('cell', fake_spec.requested_destination)
self.assertIsNotNone(fake_spec.requested_destination.cell)
select_dest_mock.assert_called_once_with(self.context,
fake_spec, [inst_obj.uuid], return_objects=True,
return_alternates=False)
compute_args['host'] = expected_host
compute_args['request_spec'] = fake_spec
get_req_res_mock.assert_called_once_with(
self.context, inst_obj.uuid)
fill_rp_mapping_mock.assert_called_once_with(
fake_spec, fake_selection)
self.assertIn('Failed to bind accelerator requests', ex.message)
mock_del_arqs.assert_called_with(arqs)
mock_del_arqs_instance.assert_called_once()
self.assertEqual('compute.instance.rebuild.scheduled',
fake_notifier.NOTIFICATIONS[0].event_type)
mock_notify.assert_called_once_with(
self.context, inst_obj, 'thebesthost', action='rebuild_scheduled',
source='nova-conductor')
@mock.patch(
'nova.conductor.tasks.cross_cell_migrate.ConfirmResizeTask.execute')
def test_confirm_snapshot_based_resize(self, mock_execute):

View File

@ -0,0 +1,12 @@
---
features:
- |
Add Cyborg shelve/unshelve support.
After shelve the ARQs are still kept bound to the instance.
After shelve offload the ARQs of the instance will be feered
in Cyborg.
During unshelve the ARQs will be reallocated and bound to
the instance if needed.