Add support for volume backed server rebuild

This patch adds the plumbing for rebuilding a volume backed
instance in compute code. This functionality will be enabled
in a subsequent patch which adds a new microversion and the
external support for requesting it.

The flow of the operation is as follows:

1) Create an empty attachment
2) Detach the volume
3) Request cinder to reimage the volume
4) Wait for cinder to notify success to nova (via external events)
5) Update and complete the attachment

Related blueprint volume-backed-server-rebuild

Change-Id: I0d889691de1af6875603a9f0f174590229e7be18
This commit is contained in:
Rajat Dhasmana 2021-12-03 05:20:26 -05:00 committed by whoami-rajat
parent 0bab2e5a88
commit 30aab9c234
14 changed files with 464 additions and 52 deletions

View File

@ -31,6 +31,7 @@ import contextlib
import copy
import functools
import inspect
import math
import sys
import time
import traceback
@ -615,7 +616,7 @@ class ComputeVirtAPI(virtapi.VirtAPI):
class ComputeManager(manager.Manager):
"""Manages the running instances from creation to destruction."""
target = messaging.Target(version='6.0')
target = messaging.Target(version='6.1')
def __init__(self, compute_driver=None, *args, **kwargs):
"""Load configuration options and connect to the hypervisor."""
@ -3398,18 +3399,124 @@ class ComputeManager(manager.Manager):
migration.status = status
migration.save()
@staticmethod
def _reimage_failed_callback(event_name, instance):
msg = ('Cinder reported failure during reimaging '
'with %(event)s for instance %(uuid)s')
msg_args = {'event': event_name, 'uuid': instance.uuid}
LOG.error(msg, msg_args)
raise exception.ReimageException(msg % msg_args)
def _detach_root_volume(self, context, instance, root_bdm):
volume_id = root_bdm.volume_id
mp = root_bdm.device_name
old_connection_info = jsonutils.loads(root_bdm.connection_info)
try:
self.driver.detach_volume(context, old_connection_info,
instance, root_bdm.device_name)
except exception.DiskNotFound as err:
LOG.warning('Ignoring DiskNotFound exception while '
'detaching volume %(volume_id)s from '
'%(mp)s : %(err)s',
{'volume_id': volume_id, 'mp': mp,
'err': err}, instance=instance)
except exception.DeviceDetachFailed:
with excutils.save_and_reraise_exception():
LOG.warning('Guest refused to detach volume %(vol)s',
{'vol': volume_id}, instance=instance)
self.volume_api.roll_detaching(context, volume_id)
except Exception:
with excutils.save_and_reraise_exception():
LOG.exception('Failed to detach volume '
'%(volume_id)s from %(mp)s',
{'volume_id': volume_id, 'mp': mp},
instance=instance)
self.volume_api.roll_detaching(context, volume_id)
def _rebuild_volume_backed_instance(self, context, instance, bdms,
image_id):
# Get root bdm and attachment ID associated to it
root_bdm = compute_utils.get_root_bdm(context, instance, bdms)
old_attachment_id = root_bdm.attachment_id
# Create a new attachment and delete the previous attachment
# We create a new attachment first to keep the volume in
# reserved state after old attachment is deleted and avoid any
# races in between the attachment create and delete.
attachment_id = None
try:
attachment_id = self.volume_api.attachment_create(
context, root_bdm.volume_id, instance.uuid)['id']
self._detach_root_volume(context, instance, root_bdm)
root_bdm.attachment_id = attachment_id
root_bdm.save()
self.volume_api.attachment_delete(context,
old_attachment_id)
except exception.InstanceNotFound:
# This means we failed to save the new attachment because
# the instance is deleted, so (try to) delete it and abort.
try:
self.volume_api.attachment_delete(context,
attachment_id)
except cinder_exception.ClientException:
LOG.error('Failed to delete new attachment %s',
attachment_id)
msg = _('Failed to rebuild volume backed instance.')
raise exception.BuildAbortException(
instance_uuid=instance.uuid, reason=msg)
except cinder_exception.ClientException:
if attachment_id:
LOG.error('Failed to delete old attachment %s',
old_attachment_id)
else:
LOG.error('Failed to create new attachment')
msg = _('Failed to rebuild volume backed instance.')
raise exception.BuildAbortException(
instance_uuid=instance.uuid, reason=msg)
events = [('volume-reimaged', root_bdm.volume_id)]
# Get the image requested for rebuild
try:
image = self.image_api.get(context, image_id)
except exception.ImageNotFound:
msg = _('Image %s not found.') % image_id
LOG.error(msg)
raise exception.BuildAbortException(
instance_uuid=instance.uuid, reason=msg)
image_size = int(math.ceil(float(image.get('size')) / units.Gi))
deadline = CONF.reimage_timeout_per_gb * image_size
error_cb = self._reimage_failed_callback
# Call cinder to perform reimage operation and wait until an
# external event is triggered.
try:
with self.virtapi.wait_for_instance_event(instance, events,
deadline=deadline,
error_callback=error_cb):
self.volume_api.reimage_volume(
context, root_bdm.volume_id, image_id,
reimage_reserved=True)
except Exception as ex:
LOG.error('Failed to rebuild volume backed instance: %s',
str(ex), instance=instance)
msg = _('Failed to rebuild volume backed instance.')
raise exception.BuildAbortException(
instance_uuid=instance.uuid, reason=msg)
def _rebuild_default_impl(
self, context, instance, image_meta, injected_files,
admin_password, allocations, bdms, detach_block_devices,
attach_block_devices, network_info=None, evacuate=False,
block_device_info=None, preserve_ephemeral=False,
accel_uuids=None):
accel_uuids=None, reimage_boot_volume=False):
if preserve_ephemeral:
# The default code path does not support preserving ephemeral
# partitions.
raise exception.PreserveEphemeralNotSupported()
accel_info = []
detach_root_bdm = not reimage_boot_volume
if evacuate:
if instance.flavor.extra_specs.get('accel:device_profile'):
try:
@ -3421,13 +3528,36 @@ class ComputeManager(manager.Manager):
msg = _('Failure getting accelerator resources.')
raise exception.BuildAbortException(
instance_uuid=instance.uuid, reason=msg)
detach_block_devices(context, bdms)
detach_block_devices(context, bdms,
detach_root_bdm=detach_root_bdm)
else:
self._power_off_instance(instance, clean_shutdown=True)
detach_block_devices(context, bdms)
self.driver.destroy(context, instance,
network_info=network_info,
block_device_info=block_device_info)
detach_block_devices(context, bdms,
detach_root_bdm=detach_root_bdm)
if reimage_boot_volume:
# Previously, the calls reaching here were for image
# backed instance rebuild and didn't have a root bdm
# so now we need to handle the case for root bdm.
# For the root BDM, we are doing attach/detach operations
# manually as we want to maintain a 'reserved' state
# throughout the reimage process from the cinder side so
# we are excluding the root BDM from certain operations
# here i.e. deleteing it's mapping before the destroy call.
block_device_info_copy = copy.deepcopy(block_device_info)
root_bdm = compute_utils.get_root_bdm(context, instance, bdms)
mapping = block_device_info_copy["block_device_mapping"]
# drop root bdm from the mapping
mapping = [
bdm for bdm in mapping
if bdm["volume_id"] != root_bdm.volume_id
]
self.driver.destroy(context, instance,
network_info=network_info,
block_device_info=block_device_info_copy)
else:
self.driver.destroy(context, instance,
network_info=network_info,
block_device_info=block_device_info)
try:
accel_info = self._get_accel_info(context, instance)
except Exception as exc:
@ -3436,6 +3566,12 @@ class ComputeManager(manager.Manager):
msg = _('Failure getting accelerator resources.')
raise exception.BuildAbortException(
instance_uuid=instance.uuid, reason=msg)
if reimage_boot_volume:
is_volume_backed = compute_utils.is_volume_backed_instance(
context, instance, bdms)
if is_volume_backed:
self._rebuild_volume_backed_instance(
context, instance, bdms, image_meta.id)
instance.task_state = task_states.REBUILD_BLOCK_DEVICE_MAPPING
instance.save(expected_task_state=[task_states.REBUILDING])
@ -3470,7 +3606,8 @@ class ComputeManager(manager.Manager):
injected_files, new_pass, orig_sys_metadata,
bdms, recreate, on_shared_storage,
preserve_ephemeral, migration,
scheduled_node, limits, request_spec, accel_uuids):
scheduled_node, limits, request_spec, accel_uuids,
reimage_boot_volume):
"""Destroy and re-make this instance.
A 'rebuild' effectively purges all existing data from the system and
@ -3502,6 +3639,9 @@ class ComputeManager(manager.Manager):
specified by the user, this will be None
:param request_spec: a RequestSpec object used to schedule the instance
:param accel_uuids: a list of cyborg ARQ uuids
:param reimage_boot_volume: Boolean to specify whether the user has
explicitly requested to rebuild a boot
volume
"""
# recreate=True means the instance is being evacuated from a failed
@ -3566,7 +3706,7 @@ class ComputeManager(manager.Manager):
image_meta, injected_files, new_pass, orig_sys_metadata,
bdms, evacuate, on_shared_storage, preserve_ephemeral,
migration, request_spec, allocs, rebuild_claim,
scheduled_node, limits, accel_uuids)
scheduled_node, limits, accel_uuids, reimage_boot_volume)
except (exception.ComputeResourcesUnavailable,
exception.RescheduledException) as e:
if isinstance(e, exception.ComputeResourcesUnavailable):
@ -3625,7 +3765,8 @@ class ComputeManager(manager.Manager):
self, context, instance, orig_image_ref, image_meta,
injected_files, new_pass, orig_sys_metadata, bdms, evacuate,
on_shared_storage, preserve_ephemeral, migration, request_spec,
allocations, rebuild_claim, scheduled_node, limits, accel_uuids):
allocations, rebuild_claim, scheduled_node, limits, accel_uuids,
reimage_boot_volume):
"""Helper to avoid deep nesting in the top-level method."""
provider_mapping = None
@ -3647,7 +3788,7 @@ class ComputeManager(manager.Manager):
context, instance, orig_image_ref, image_meta, injected_files,
new_pass, orig_sys_metadata, bdms, evacuate, on_shared_storage,
preserve_ephemeral, migration, request_spec, allocations,
provider_mapping, accel_uuids)
provider_mapping, accel_uuids, reimage_boot_volume)
@staticmethod
def _get_image_name(image_meta):
@ -3661,7 +3802,7 @@ class ComputeManager(manager.Manager):
injected_files, new_pass, orig_sys_metadata, bdms, evacuate,
on_shared_storage, preserve_ephemeral, migration, request_spec,
allocations, request_group_resource_providers_mapping,
accel_uuids):
accel_uuids, reimage_boot_volume):
orig_vm_state = instance.vm_state
if evacuate:
@ -3766,8 +3907,23 @@ class ComputeManager(manager.Manager):
self._get_instance_block_device_info(
context, instance, bdms=bdms)
def detach_block_devices(context, bdms):
def detach_block_devices(context, bdms, detach_root_bdm=True):
for bdm in bdms:
# Previously, the calls made to this method by rebuild
# instance operation were for image backed instances which
# assumed we only had attached volumes and no root BDM.
# Now we need to handle case for root BDM which we are
# doing manually so skipping the attachment create/delete
# calls from here.
# The detach_root_bdm parameter is only passed while
# rebuilding the volume backed instance so we don't have
# to worry about other callers as they won't satisfy this
# condition.
# For evacuate case, we have detach_root_bdm always True
# since we don't have reimage_boot_volume parameter in
# this case so this will not be executed.
if not detach_root_bdm and bdm.is_root:
continue
if bdm.is_volume:
# NOTE (ildikov): Having the attachment_id set in the BDM
# means that it's the new Cinder attach/detach flow
@ -3803,7 +3959,8 @@ class ComputeManager(manager.Manager):
network_info=network_info,
preserve_ephemeral=preserve_ephemeral,
evacuate=evacuate,
accel_uuids=accel_uuids)
accel_uuids=accel_uuids,
reimage_boot_volume=reimage_boot_volume)
try:
with instance.mutated_migration_context():
self.driver.rebuild(**kwargs)
@ -11082,7 +11239,7 @@ class _ComputeV5Proxy(object):
bdms, recreate, on_shared_storage,
preserve_ephemeral, migration,
scheduled_node, limits, request_spec,
accel_uuids)
accel_uuids, False)
# 5.13 support for optional accel_uuids argument
def shelve_instance(self, context, instance, image_id,

View File

@ -402,6 +402,7 @@ class ComputeAPI(object):
* ... - Rename the instance_type argument of prep_resize() to flavor
* ... - Rename the instance_type argument of resize_instance() to
flavor
* 6.1 - Add reimage_boot_volume parameter to rebuild_instance()
'''
VERSION_ALIASES = {
@ -1080,7 +1081,8 @@ class ComputeAPI(object):
self, ctxt, instance, new_pass, injected_files,
image_ref, orig_image_ref, orig_sys_metadata, bdms,
recreate, on_shared_storage, host, node,
preserve_ephemeral, migration, limits, request_spec, accel_uuids):
preserve_ephemeral, migration, limits, request_spec, accel_uuids,
reimage_boot_volume):
# NOTE(edleafe): compute nodes can only use the dict form of limits.
if isinstance(limits, objects.SchedulerLimits):
@ -1092,10 +1094,20 @@ class ComputeAPI(object):
'scheduled_node': node,
'limits': limits,
'request_spec': request_spec,
'accel_uuids': accel_uuids
'accel_uuids': accel_uuids,
'reimage_boot_volume': reimage_boot_volume
}
version = self._ver(ctxt, '5.12')
version = '6.1'
client = self.router.client(ctxt)
if not client.can_send_version(version):
if msg_args['reimage_boot_volume']:
raise exception.NovaException(
'Compute RPC version does not support '
'reimage_boot_volume parameter.')
else:
del msg_args['reimage_boot_volume']
version = self._ver(ctxt, '5.12')
if not client.can_send_version(version):
del msg_args['accel_uuids']
version = '5.0'

View File

@ -1146,7 +1146,7 @@ class ComputeTaskManager:
injected_files, new_pass, orig_sys_metadata,
bdms, recreate, on_shared_storage,
preserve_ephemeral=False, host=None,
request_spec=None):
request_spec=None, reimage_boot_volume=False):
# recreate=True means the instance is being evacuated from a failed
# host to a new destination host. The 'recreate' variable name is
# confusing, so rename it to evacuate here at the top, which is simpler
@ -1343,7 +1343,8 @@ class ComputeTaskManager:
node=node,
limits=limits,
request_spec=request_spec,
accel_uuids=accel_uuids)
accel_uuids=accel_uuids,
reimage_boot_volume=False)
def _validate_image_traits_for_rebuild(self, context, instance, image_ref):
"""Validates that the traits specified in the image can be satisfied

View File

@ -305,6 +305,21 @@ Related options:
agent disabled. When used with libvirt the instance mode should be
configured as HVM.
"""),
cfg.IntOpt('reimage_timeout_per_gb',
default=20,
min=1,
help="""
Timeout for reimaging a volume.
Number of seconds to wait for volume-reimaged events to arrive before
continuing or failing.
This is a per gigabyte time which has a default value of 20 seconds and
will be multiplied by the GB size of image. Eg: an image of 6 GB will have
a timeout of 20 * 6 = 120 seconds.
Try increasing the timeout if the image copy per GB takes more time and you
are hitting timeout failures.
"""),
]
resource_tracker_opts = [

View File

@ -2477,3 +2477,7 @@ class PlacementPciMixedTraitsException(PlacementPciException):
"of 'traits' in [pci]device_spec. We got %(new_traits)s for "
"%(new_dev)s and %(current_traits)s for %(current_devs)s."
)
class ReimageException(NovaException):
msg_fmt = _("Reimaging volume failed.")

View File

@ -33,6 +33,9 @@ EVENT_NAMES = [
# Accelerator Request got bound, tag is ARQ uuid.
# Sent when an ARQ for an instance has been bound or failed to bind.
'accelerator-request-bound',
# re-image operation has completed from cinder side
'volume-reimaged',
]
EVENT_STATUSES = ['failed', 'completed', 'in-progress']
@ -50,7 +53,8 @@ class InstanceExternalEvent(obj_base.NovaObject):
# Version 1.2: adds volume-extended event
# Version 1.3: adds power-update event
# Version 1.4: adds accelerator-request-bound event
VERSION = '1.4'
# Version 1.5: adds volume-reimaged event
VERSION = '1.5'
fields = {
'instance_uuid': fields.UUIDField(),

View File

@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__)
# NOTE(danms): This is the global service version counter
SERVICE_VERSION = 63
SERVICE_VERSION = 64
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
@ -222,6 +222,9 @@ SERVICE_VERSION_HISTORY = (
# Version 63: Compute RPC v6.0:
# Add support for VDPA hotplug live migration and suspend/resume
{'compute_rpc': '6.0'},
# Version 64: Compute RPC v6.1:
# Add reimage_boot_volume parameter to rebuild_instance()
{'compute_rpc': '6.1'},
)
# This is used to raise an error at service startup if older than N-1 computes

View File

@ -2730,7 +2730,8 @@ class ComputeTestCase(BaseTestCase,
new_pass="new_password", orig_sys_metadata=sys_metadata,
bdms=[], recreate=False, on_shared_storage=False,
preserve_ephemeral=False, migration=None, scheduled_node=None,
limits={}, request_spec=None, accel_uuids=[])
limits={}, request_spec=None, accel_uuids=[],
reimage_boot_volume=False)
self.compute.terminate_instance(self.context, instance, [])
def test_rebuild_driver(self):
@ -2760,7 +2761,8 @@ class ComputeTestCase(BaseTestCase,
new_pass="new_password", orig_sys_metadata=sys_metadata,
bdms=[], recreate=False, on_shared_storage=False,
preserve_ephemeral=False, migration=None, scheduled_node=None,
limits={}, request_spec=None, accel_uuids=[])
limits={}, request_spec=None, accel_uuids=[],
reimage_boot_volume=False)
self.assertTrue(called['rebuild'])
self.compute.terminate_instance(self.context, instance, [])
@ -2812,7 +2814,8 @@ class ComputeTestCase(BaseTestCase,
new_pass="new_password", orig_sys_metadata=sys_metadata,
bdms=bdms, recreate=False, preserve_ephemeral=False,
migration=None, scheduled_node=None, limits={},
on_shared_storage=False, request_spec=None, accel_uuids=[])
on_shared_storage=False, request_spec=None, accel_uuids=[],
reimage_boot_volume=False)
self.assertTrue(called['rebuild'])
self.compute.terminate_instance(self.context, instance, [])
@ -2831,7 +2834,7 @@ class ComputeTestCase(BaseTestCase,
new_pass="new_password", orig_sys_metadata=sys_metadata, bdms=[],
recreate=False, on_shared_storage=False, preserve_ephemeral=False,
migration=None, scheduled_node=None, limits=None,
request_spec=None, accel_uuids=[])
request_spec=None, accel_uuids=[], reimage_boot_volume=False)
self.compute.terminate_instance(self.context, instance, [])
def test_rebuild_launched_at_time(self):
@ -2852,7 +2855,7 @@ class ComputeTestCase(BaseTestCase,
new_pass="new_password", orig_sys_metadata={}, bdms=[],
recreate=False, on_shared_storage=False, preserve_ephemeral=False,
migration=None, scheduled_node=None, limits={}, request_spec=None,
accel_uuids=[])
accel_uuids=[], reimage_boot_volume=False)
instance.refresh()
self.assertEqual(cur_time,
instance['launched_at'].replace(tzinfo=None))
@ -2885,7 +2888,8 @@ class ComputeTestCase(BaseTestCase,
injected_files=injected_files, new_pass="new_password",
orig_sys_metadata=sys_metadata, bdms=[], recreate=False,
on_shared_storage=False, preserve_ephemeral=False, migration=None,
scheduled_node=None, limits={}, request_spec=None, accel_uuids=[])
scheduled_node=None, limits={}, request_spec=None, accel_uuids=[],
reimage_boot_volume=False)
self.compute.terminate_instance(self.context, instance, [])
@mock.patch.object(objects.BlockDeviceMappingList, 'get_by_instance_uuid')
@ -4612,7 +4616,8 @@ class ComputeTestCase(BaseTestCase,
'limits': {},
'request_spec': None,
'on_shared_storage': False,
'accel_uuids': ()}),
'accel_uuids': (),
'reimage_boot_volume': False}),
("set_admin_password", task_states.UPDATING_PASSWORD,
{'new_pass': None}),
("rescue_instance", task_states.RESCUING,
@ -5130,7 +5135,8 @@ class ComputeTestCase(BaseTestCase,
injected_files=[], new_pass=password,
orig_sys_metadata=orig_sys_metadata, bdms=[], recreate=False,
on_shared_storage=False, preserve_ephemeral=False, migration=None,
scheduled_node=None, limits={}, request_spec=None, accel_uuids=[])
scheduled_node=None, limits={}, request_spec=None, accel_uuids=[],
reimage_boot_volume=False)
inst_ref.refresh()
@ -13534,7 +13540,7 @@ class EvacuateHostTestCase(BaseTestCase):
image_ref, injected_files, 'newpass', {}, bdms, recreate=True,
on_shared_storage=on_shared_storage, migration=migration,
preserve_ephemeral=False, scheduled_node=node, limits=limits,
request_spec=None, accel_uuids=[])
request_spec=None, accel_uuids=[], reimage_boot_volume=False)
if vm_states_is_stopped:
mock_notify_rebuild.assert_has_calls([
mock.call(ctxt, self.inst, self.inst.host, phase='start',

View File

@ -5305,7 +5305,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
self.compute.rebuild_instance(
self.context, instance, None, None, None, None, None, None,
recreate, False, False, None, scheduled_node, {}, None, [])
recreate, False, False, None, scheduled_node, {}, None, [], False)
mock_set.assert_called_once_with(None, 'failed')
mock_notify_about_instance_usage.assert_called_once_with(
mock.ANY, instance, 'rebuild.error', fault=mock_rebuild.side_effect
@ -5416,7 +5416,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
None, recreate=True, on_shared_storage=None,
preserve_ephemeral=False, migration=None,
scheduled_node='fake-node',
limits={}, request_spec=request_spec, accel_uuids=[])
limits={}, request_spec=request_spec, accel_uuids=[],
reimage_boot_volume=False)
mock_validate_policy.assert_called_once_with(
elevated_context, instance, {'group': [uuids.group]})
@ -5455,7 +5456,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
self.context, instance, None, None, None, None, None, None,
recreate=True, on_shared_storage=None, preserve_ephemeral=False,
migration=None, scheduled_node='fake-node', limits={},
request_spec=request_spec, accel_uuids=[])
request_spec=request_spec, accel_uuids=[],
reimage_boot_volume=False)
mock_validate_policy.assert_called_once_with(
elevated_context, instance, {'group': [uuids.group]})
@ -5481,7 +5483,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
self.compute.rebuild_instance(
self.context, instance, None, None,
None, None, None, None, False,
False, False, migration, None, {}, None, [])
False, False, migration, None, {}, None, [], False)
self.assertFalse(mock_get.called)
self.assertEqual(node, instance.node)
self.assertEqual('done', migration.status)
@ -5503,7 +5505,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
self.compute.rebuild_instance(
self.context, instance, None, None, None, None, None,
None, True, False, False, mock.sentinel.migration, None, {},
None, [])
None, [], False)
mock_get.assert_called_once_with(mock.ANY, self.compute.host)
mock_rt.finish_evacuation.assert_called_once_with(
instance, 'new-node', mock.sentinel.migration)
@ -5585,7 +5587,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
recreate, on_shared_storage,
preserve_ephemeral, {}, {},
self.allocations,
mock.sentinel.mapping, [])
mock.sentinel.mapping, [],
False)
mock_notify_usage.assert_has_calls(
[mock.call(self.context, instance, "rebuild.start",
@ -5603,8 +5606,12 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
provider_mappings=mock.sentinel.mapping)
mock_get_nw_info.assert_called_once_with(self.context, instance)
def test_rebuild_default_impl(self):
def _detach(context, bdms):
@ddt.data((False, False), (False, True), (True, False), (True, True))
@ddt.unpack
def test_rebuild_default_impl(self, is_vol_backed, reimage_boot_vol):
fake_image_meta = mock.MagicMock(id='fake_id')
def _detach(context, bdms, detach_root_bdm=True):
# NOTE(rpodolyaka): check that instance has been powered off by
# the time we detach block devices, exact calls arguments will be
# checked below
@ -5630,13 +5637,20 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
mock.patch.object(self.compute, '_power_off_instance',
return_value=None),
mock.patch.object(self.compute, '_get_accel_info',
return_value=[])
return_value=[]),
mock.patch.object(compute_utils, 'is_volume_backed_instance',
return_value=is_vol_backed),
mock.patch.object(self.compute, '_rebuild_volume_backed_instance'),
mock.patch.object(compute_utils, 'get_root_bdm')
) as(
mock_destroy,
mock_spawn,
mock_save,
mock_power_off,
mock_accel_info
mock_accel_info,
mock_is_volume_backed,
mock_rebuild_vol_backed_inst,
mock_get_root,
):
instance = fake_instance.fake_instance_obj(self.context)
instance.migration_context = None
@ -5646,9 +5660,19 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
instance.device_metadata = None
instance.task_state = task_states.REBUILDING
instance.save(expected_task_state=[task_states.REBUILDING])
fake_block_device_info = {
'block_device_mapping': [
{'attachment_id': '341a8917-f74d-4473-8ee7-4ca05e5e0ab3',
'volume_id': 'b7c93bb9-dfe4-41af-aa56-e6b28342fd8f',
'connection_info': {'driver_volume_type': 'iscsi',
'data': {'target_discovered': False,
'target_portal': '127.0.0.1:3260',
'target_iqn': 'iqn.2010-10.org.openstack:volume-'
'b7c93bb9-dfe4-41af-aa56-e6b28342fd8f',
'target_lun': 0}}}]}
self.compute._rebuild_default_impl(self.context,
instance,
None,
fake_image_meta,
[],
admin_password='new_pass',
bdms=[],
@ -5657,16 +5681,151 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
attach_block_devices=_attach,
network_info=None,
evacuate=False,
block_device_info=None,
preserve_ephemeral=False)
block_device_info=
fake_block_device_info,
preserve_ephemeral=False,
reimage_boot_volume=
reimage_boot_vol)
self.assertTrue(mock_save.called)
self.assertTrue(mock_spawn.called)
mock_destroy.assert_called_once_with(
self.context, instance,
network_info=None, block_device_info=None)
network_info=None, block_device_info=fake_block_device_info)
mock_power_off.assert_called_once_with(
instance, clean_shutdown=True)
if is_vol_backed and reimage_boot_vol:
mock_rebuild_vol_backed_inst.assert_called_once_with(
self.context, instance, [], fake_image_meta.id)
else:
mock_rebuild_vol_backed_inst.assert_not_called()
@mock.patch('nova.volume.cinder.API.attachment_delete')
@mock.patch('nova.volume.cinder.API.attachment_create',
return_value={'id': uuids.new_attachment_id})
@mock.patch.object(nova.compute.manager.ComputeVirtAPI,
'wait_for_instance_event')
def test__rebuild_volume_backed_instance(
self, wait_inst_event, attach_create, attach_delete):
fake_conn_info = '{}'
fake_device = 'fake_vda'
root_bdm = mock.MagicMock(
volume_id=uuids.volume_id, connection_info=fake_conn_info,
device_name=fake_device, attachment_id=uuids.old_attachment_id,
save=mock.MagicMock())
bdms = [root_bdm]
events = [('volume-reimaged', root_bdm.volume_id)]
image_size_gb = 1
deadline = CONF.reimage_timeout_per_gb * image_size_gb
with test.nested(
mock.patch.object(objects.Instance, 'save',
return_value=None),
mock.patch.object(compute_utils, 'get_root_bdm',
return_value=root_bdm),
mock.patch.object(self.compute, 'volume_api'),
mock.patch.object(self.compute.image_api, 'get'),
) as (
mock_save,
mock_get_root_bdm,
mock_vol_api,
mock_get_img
):
instance = fake_instance.fake_instance_obj(self.context)
instance.task_state = task_states.REBUILDING
# 1024 ** 3 = 1073741824
mock_get_img.return_value = {'size': 1073741824}
self.compute._rebuild_volume_backed_instance(
self.context, instance, bdms, uuids.image_id)
mock_vol_api.attachment_create.assert_called_once_with(
self.context, uuids.volume_id, instance.uuid)
mock_vol_api.attachment_delete.assert_called_once_with(
self.context, uuids.old_attachment_id)
mock_vol_api.reimage_volume.assert_called_once_with(
self.context, uuids.volume_id, uuids.image_id,
reimage_reserved=True)
mock_get_img.assert_called_once_with(
self.context, uuids.image_id)
mock_get_root_bdm.assert_called_once_with(
self.context, instance, bdms)
wait_inst_event.assert_called_once_with(
instance, events, deadline=deadline,
error_callback=self.compute._reimage_failed_callback)
@mock.patch('nova.volume.cinder.API.attachment_delete')
@mock.patch('nova.volume.cinder.API.attachment_create',
return_value={'id': uuids.new_attachment_id})
@mock.patch.object(nova.compute.manager.ComputeVirtAPI,
'wait_for_instance_event')
def test__rebuild_volume_backed_instance_image_not_found(
self, wait_inst_event, attach_create, attach_delete):
fake_conn_info = '{}'
fake_device = 'fake_vda'
root_bdm = mock.MagicMock(
volume_id=uuids.volume_id, connection_info=fake_conn_info,
device_name=fake_device, attachment_id=uuids.old_attachment_id,
save=mock.MagicMock())
bdms = [root_bdm]
with test.nested(
mock.patch.object(objects.Instance, 'save',
return_value=None),
mock.patch.object(compute_utils, 'get_root_bdm',
return_value=root_bdm),
mock.patch.object(self.compute, 'volume_api'),
mock.patch.object(self.compute.image_api, 'get'),
) as(
mock_save,
mock_get_root_bdm,
mock_vol_api,
mock_get_img
):
mock_get_img.side_effect = exception.ImageNotFound(
image_id=uuids.image_id)
instance = fake_instance.fake_instance_obj(self.context)
instance.task_state = task_states.REBUILDING
instance.save(expected_task_state=[task_states.REBUILDING])
mock_get_img.return_value = {'size': 1}
self.assertRaises(
exception.BuildAbortException,
self.compute._rebuild_volume_backed_instance,
self.context, instance, bdms, uuids.image_id)
mock_vol_api.attachment_create.assert_called_once_with(
self.context, uuids.volume_id, instance.uuid)
mock_vol_api.attachment_delete.assert_called_once_with(
self.context, uuids.old_attachment_id)
mock_get_img.assert_called_once_with(
self.context, uuids.image_id)
@mock.patch.object(objects.Instance, 'save', return_value=None)
@mock.patch.object(fake_driver.SmallFakeDriver, 'detach_volume')
@mock.patch.object(cinder.API, 'roll_detaching')
def test__detach_root_volume(self, mock_roll_detach, mock_detach,
mock_save):
exception_list = [
'',
exception.DiskNotFound(location="not\\here"),
exception.DeviceDetachFailed(device="fake_dev", reason="unknown"),
]
mock_detach.side_effect = exception_list
fake_conn_info = '{}'
fake_device = 'fake_vda'
root_bdm = mock.MagicMock(
volume_id=uuids.volume_id, connection_info=fake_conn_info,
device_name=fake_device, attachment_id=uuids.old_attachment_id,
save=mock.MagicMock())
instance = fake_instance.fake_instance_obj(self.context)
instance.task_state = task_states.REBUILDING
instance.save(expected_task_state=[task_states.REBUILDING])
self.compute._detach_root_volume(self.context, instance, root_bdm)
self.compute._detach_root_volume(self.context, instance, root_bdm)
self.assertRaises(exception.DeviceDetachFailed,
self.compute._detach_root_volume,
self.context, instance, root_bdm)
mock_roll_detach.assert_called_with(self.context, uuids.volume_id)
self.assertRaises(Exception, self.compute._detach_root_volume, # noqa
self.context, instance, root_bdm)
mock_roll_detach.assert_called_with(self.context, uuids.volume_id)
def test_do_rebuild_instance_check_trusted_certs(self):
"""Tests the scenario that we're rebuilding an instance with
@ -5688,7 +5847,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
request_spec=objects.RequestSpec(),
allocations=self.allocations,
request_group_resource_providers_mapping=mock.sentinel.mapping,
accel_uuids=[])
accel_uuids=[], reimage_boot_volume=False)
self.assertIn('Trusted image certificates provided on host', str(ex))
def test_reverts_task_state_instance_not_found(self):

View File

@ -835,7 +835,8 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
bdms=[], instance=self.fake_instance_obj, host='new_host',
orig_sys_metadata=None, recreate=True, on_shared_storage=True,
preserve_ephemeral=True, migration=None, node=None,
limits=None, request_spec=None, accel_uuids=[], version='6.0')
limits=None, request_spec=None, accel_uuids=[],
reimage_boot_volume=False, version='6.1')
def test_rebuild_instance_old_rpcapi(self):
# With rpcapi < 5.12, accel_uuids must be dropped in the client call.
@ -862,20 +863,58 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
'migration': None,
'limits': None
}
# Pass reimage_boot_volume to the client call...
compute_api.rebuild_instance(
ctxt, instance=self.fake_instance_obj,
accel_uuids=['938af7f9-f136-4e5a-bdbe-3b6feab54311'],
node=None, host=None, **rebuild_args)
node=None, host=None, reimage_boot_volume=False,
**rebuild_args)
mock_client.can_send_version.assert_has_calls([mock.call('6.0'),
mock.call('5.12')])
mock_client.prepare.assert_called_with(
server=self.fake_instance_obj.host, version='5.0')
# ...and assert that it does not show up on the wire before 6.1
mock_cctx.cast.assert_called_with( # No accel_uuids
ctxt, 'rebuild_instance',
instance=self.fake_instance_obj,
scheduled_node=None, **rebuild_args)
def test_rebuild_instance_vol_backed_old_rpcapi(self):
# With rpcapi < 6.1, if reimage_boot_volume is True then we
# should raise error.
ctxt = context.RequestContext('fake_user', 'fake_project')
compute_api = compute_rpcapi.ComputeAPI()
compute_api.router.client = mock.Mock()
mock_client = mock.MagicMock()
compute_api.router.client.return_value = mock_client
# Force can_send_version to [False, True, True], so that 6.0
# version is used.
mock_client.can_send_version.side_effect = [False, True, True]
mock_cctx = mock.MagicMock()
mock_client.prepare.return_value = mock_cctx
rebuild_args = {
'new_pass': 'admin_password',
'injected_files': 'files_to_inject',
'image_ref': uuids.image_ref,
'orig_image_ref': uuids.orig_image_ref,
'orig_sys_metadata': 'orig_sys_meta',
'bdms': {},
'recreate': False,
'on_shared_storage': False,
'preserve_ephemeral': False,
'request_spec': None,
'migration': None,
'limits': None,
'accel_uuids': [],
'reimage_boot_volume': True,
}
self.assertRaises(
exception.NovaException, compute_api.rebuild_instance,
ctxt, instance=self.fake_instance_obj,
node=None, host=None, **rebuild_args)
mock_client.can_send_version.assert_has_calls([mock.call('6.1')])
def test_reserve_block_device_name(self):
self.flags(long_rpc_timeout=1234)
self._test_compute_api('reserve_block_device_name', 'call',

View File

@ -388,7 +388,8 @@ class _BaseTaskTestCase(object):
'on_shared_storage': False,
'preserve_ephemeral': False,
'host': 'compute-host',
'request_spec': None}
'request_spec': None,
'reimage_boot_volume': False}
if update_args:
rebuild_args.update(update_args)
compute_rebuild_args = copy.deepcopy(rebuild_args)
@ -1927,6 +1928,8 @@ class _BaseTaskTestCase(object):
migration_type='evacuation')
migration.create()
# TODO(whoami-rajat): Remove this compatibility code
del rebuild_args['reimage_boot_volume']
self.assertRaises(exc.UnsupportedPolicyException,
self.conductor.rebuild_instance,
self.context,

View File

@ -1079,7 +1079,7 @@ object_data = {
'InstanceActionEventList': '1.1-13d92fb953030cdbfee56481756e02be',
'InstanceActionList': '1.1-a2b2fb6006b47c27076d3a1d48baa759',
'InstanceDeviceMetadata': '1.0-74d78dd36aa32d26d2769a1b57caf186',
'InstanceExternalEvent': '1.4-06c2dfcf2d2813c24cd37ee728524f1a',
'InstanceExternalEvent': '1.5-1ec57351a9851c1eb43ccd90662d6dd0',
'InstanceFault': '1.2-7ef01f16f1084ad1304a513d6d410a38',
'InstanceFaultList': '1.2-6bb72de2872fe49ded5eb937a93f2451',
'InstanceGroup': '1.11-852ac511d30913ee88f3c3a869a8f30a',

View File

@ -333,7 +333,8 @@ class ComputeDriver(object):
admin_password, allocations, bdms, detach_block_devices,
attach_block_devices, network_info=None,
evacuate=False, block_device_info=None,
preserve_ephemeral=False, accel_uuids=None):
preserve_ephemeral=False, accel_uuids=None,
reimage_boot_volume=False):
"""Destroy and re-make this instance.
A 'rebuild' effectively purges all existing data from the system and
@ -371,6 +372,7 @@ class ComputeDriver(object):
:param preserve_ephemeral: True if the default ephemeral storage
partition must be preserved on rebuild
:param accel_uuids: Accelerator UUIDs.
:param reimage_boot_volume: Re-image the volume backed instance.
"""
raise NotImplementedError()

View File

@ -1630,7 +1630,8 @@ class IronicDriver(virt_driver.ComputeDriver):
admin_password, allocations, bdms, detach_block_devices,
attach_block_devices, network_info=None,
evacuate=False, block_device_info=None,
preserve_ephemeral=False, accel_uuids=None):
preserve_ephemeral=False, accel_uuids=None,
reimage_boot_volume=False):
"""Rebuild/redeploy an instance.
This version of rebuild() allows for supporting the option to
@ -1671,7 +1672,13 @@ class IronicDriver(virt_driver.ComputeDriver):
:param preserve_ephemeral: Boolean value; if True the ephemeral
must be preserved on rebuild.
:param accel_uuids: Accelerator UUIDs. Ignored by this driver.
:param reimage_boot_volume: Re-image the volume backed instance.
"""
if reimage_boot_volume:
raise exception.NovaException(
_("Ironic doesn't support rebuilding volume backed "
"instances."))
LOG.debug('Rebuild called for instance', instance=instance)
instance.task_state = task_states.REBUILD_SPAWNING