libvirt: Don't delete disks on shared storage during evacuate

When evacuating an instance between compute hosts on shared storage,
during the rebuild operation we call spawn() on the destination
compute. spawn() currently assumes that it should cleanup all
resources on failure, which results in user data being deleted in the
evacuate case.

This change modifies spawn in the libvirt driver such that it only
cleans up resources it created.

Co-Authored-By: Lee Yarwood <lyarwood@redhat.com>
Closes-Bug: #1550919
Change-Id: I764481966c96a67d993da6e902dc9fc3ad29ee36
(cherry picked from commit 083df01a4d)
This commit is contained in:
Matthew Booth 2018-08-15 14:39:48 +01:00 committed by Lee Yarwood
parent 90e0e874bd
commit 497360b0ea
5 changed files with 185 additions and 91 deletions

View File

@ -51,7 +51,8 @@ class ServersTestBase(base.ServersTestBase):
self.useFixture(fakelibvirt.FakeLibvirtFixture())
self.useFixture(fixtures.MockPatch(
'nova.virt.libvirt.LibvirtDriver._create_image'))
'nova.virt.libvirt.LibvirtDriver._create_image',
return_value=(False, False)))
self.useFixture(fixtures.MockPatch(
'nova.virt.libvirt.LibvirtDriver._get_local_gb_info',
return_value={'total': 128, 'used': 44, 'free': 84}))

View File

@ -141,10 +141,9 @@ class _FileTest(object):
source_root_disk = os.path.join(
self.source_instance_path(server), disk)
# FIXME(mdbooth): We should not have deleted a shared disk
self.assertFalse(os.path.exists(source_root_disk),
"Source root disk %s for server %s exists" %
(source_root_disk, name))
self.assertTrue(os.path.exists(source_root_disk),
"Source root disk %s for server %s does not exist" %
(source_root_disk, name))
class _FlatTest(_FileTest):
@ -235,8 +234,7 @@ class _RbdTest(object):
# Check that we created a root disk and haven't called _cleanup_rbd at
# all
self.assertIn("%s_%s" % (server['id'], disk), self.created)
# FIXME(mdbooth): we should not have deleted shared disks
self.assertGreater(self.mock_rbd_driver.cleanup_volumes.call_count, 0)
self.mock_rbd_driver.cleanup_volumes.assert_not_called()
# We never want to cleanup rbd disks during evacuate, regardless of
# instance shared storage
@ -633,10 +631,9 @@ class _LibvirtEvacuateTest(integrated_helpers.InstanceHelperMixin):
server = self._evacuate_with_failure(server, compute1)
# Check that the instance directory still exists
# FIXME(mdbooth): the shared instance directory should still exist
self.assertFalse(os.path.exists(shared_instance_path),
"Shared instance directory %s for server %s "
"exists" % (shared_instance_path, name))
self.assertTrue(os.path.exists(shared_instance_path),
"Shared instance directory %s for server %s "
"does not exist" % (shared_instance_path, name))
self.assert_disks_shared_instancedir(server)

View File

@ -82,7 +82,8 @@ class TestSerialConsoleLiveMigrate(test.TestCase):
@mock.patch('os.path.getsize', return_value=1024)
@mock.patch('nova.conductor.tasks.live_migrate.LiveMigrationTask.'
'_check_destination_is_not_source', return_value=False)
@mock.patch('nova.virt.libvirt.LibvirtDriver._create_image')
@mock.patch('nova.virt.libvirt.LibvirtDriver._create_image',
return_value=(False, False))
@mock.patch('nova.virt.libvirt.LibvirtDriver._get_local_gb_info',
return_value={'total': 128,
'used': 44,

View File

@ -6275,7 +6275,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
mock.patch.object(drvr, '_is_booted_from_volume',
return_value=False),
mock.patch.object(drvr, 'plug_vifs'),
mock.patch.object(drvr, 'cleanup')):
mock.patch.object(drvr, '_cleanup')):
self.assertRaises(ValueError,
drvr._create_domain_and_network,
self.context,
@ -14257,7 +14257,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
def _test_spawn_accels(self, accel_info, mock_get_disk_info):
mock_get_disk_info.return_value = {'mapping': None}
self.stub_out('nova.virt.libvirt.driver.LibvirtDriver.'
'_create_image', lambda *a, **kw: None)
'_create_image', lambda *a, **kw: (False, False))
self.stub_out('nova.virt.libvirt.driver.LibvirtDriver.'
'_ensure_console_log_for_instance',
lambda *a, **kw: None)
@ -14323,7 +14323,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
context, xml, instance, network_info,
block_device_info=None, power_on=True,
vifs_already_plugged=False, post_xml_callback=None,
destroy_disks_on_failure=False):
cleanup_instance_dir=False, cleanup_instance_disks=False):
# The config disk should be created by this callback, so we need
# to execute it.
post_xml_callback()
@ -18709,7 +18709,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
mock.patch.object(drvr, 'plug_vifs'),
mock.patch.object(drvr, '_create_domain',
side_effect=exception.NovaException),
mock.patch.object(drvr, 'cleanup')):
mock.patch.object(drvr, '_cleanup')):
self.assertRaises(exception.NovaException,
drvr._create_domain_and_network,
self.context,
@ -18774,7 +18774,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
@mock.patch.object(drvr, 'plug_vifs')
@mock.patch.object(drvr, '_create_domain')
@mock.patch.object(drvr, 'cleanup')
@mock.patch.object(drvr, '_cleanup')
def test_create(cleanup, create, plug_vifs):
domain = drvr._create_domain_and_network(self.context, 'xml',
instance, vifs,
@ -18856,34 +18856,31 @@ class LibvirtConnTestCase(test.NoDBTestCase,
def the_test(mock_cleanup, mock_create, mock_plug):
instance = objects.Instance(**self.test_instance)
mock_create.side_effect = test.TestingException
self.assertRaises(test.TestingException,
drvr._create_domain_and_network,
self.context, 'xml', instance, [], None)
mock_cleanup.assert_called_once_with(self.context, instance,
[], None, None, False)
# destroy_disks_on_failure=True, used only by spawn()
mock_cleanup.reset_mock()
self.assertRaises(test.TestingException,
drvr._create_domain_and_network,
self.context, 'xml', instance, [], None,
destroy_disks_on_failure=True)
mock_cleanup.assert_called_once_with(self.context, instance,
[], None, None, True)
self.assertRaises(
test.TestingException, drvr._create_domain_and_network,
self.context, 'xml', instance, [], None,
cleanup_instance_dir=mock.sentinel.cleanup_instance_dir,
cleanup_instance_disks=mock.sentinel.cleanup_instance_disks)
mock_cleanup.assert_called_once_with(
self.context, instance, [], None, None,
cleanup_instance_dir=mock.sentinel.cleanup_instance_dir,
cleanup_instance_disks=mock.sentinel.cleanup_instance_disks)
the_test()
def test_cleanup_failed_start_no_guest(self):
drvr = libvirt_driver.LibvirtDriver(mock.MagicMock(), False)
with mock.patch.object(drvr, 'cleanup') as mock_cleanup:
drvr._cleanup_failed_start(None, None, None, None, None, False)
with mock.patch.object(drvr, '_cleanup') as mock_cleanup:
drvr._cleanup_failed_start(
None, None, None, None, None, False, False)
self.assertTrue(mock_cleanup.called)
def test_cleanup_failed_start_inactive_guest(self):
drvr = libvirt_driver.LibvirtDriver(mock.MagicMock(), False)
guest = mock.MagicMock()
guest.is_active.return_value = False
with mock.patch.object(drvr, 'cleanup') as mock_cleanup:
drvr._cleanup_failed_start(None, None, None, None, guest, False)
with mock.patch.object(drvr, '_cleanup') as mock_cleanup:
drvr._cleanup_failed_start(
None, None, None, None, guest, False, False)
self.assertTrue(mock_cleanup.called)
self.assertFalse(guest.poweroff.called)
@ -18891,8 +18888,9 @@ class LibvirtConnTestCase(test.NoDBTestCase,
drvr = libvirt_driver.LibvirtDriver(mock.MagicMock(), False)
guest = mock.MagicMock()
guest.is_active.return_value = True
with mock.patch.object(drvr, 'cleanup') as mock_cleanup:
drvr._cleanup_failed_start(None, None, None, None, guest, False)
with mock.patch.object(drvr, '_cleanup') as mock_cleanup:
drvr._cleanup_failed_start(
None, None, None, None, guest, False, False)
self.assertTrue(mock_cleanup.called)
self.assertTrue(guest.poweroff.called)
@ -18901,10 +18899,10 @@ class LibvirtConnTestCase(test.NoDBTestCase,
guest = mock.MagicMock()
guest.is_active.return_value = True
guest.poweroff.side_effect = test.TestingException
with mock.patch.object(drvr, 'cleanup') as mock_cleanup:
with mock.patch.object(drvr, '_cleanup') as mock_cleanup:
self.assertRaises(test.TestingException,
drvr._cleanup_failed_start,
None, None, None, None, guest, False)
None, None, None, None, guest, False, False)
self.assertTrue(mock_cleanup.called)
self.assertTrue(guest.poweroff.called)
@ -18913,12 +18911,16 @@ class LibvirtConnTestCase(test.NoDBTestCase,
guest = mock.MagicMock()
guest.is_active.return_value = True
guest.poweroff.side_effect = test.TestingException
with mock.patch.object(drvr, 'cleanup') as mock_cleanup:
self.assertRaises(test.TestingException,
drvr._cleanup_failed_start,
None, None, None, None, guest, True)
mock_cleanup.assert_called_once_with(None, None, network_info=None,
block_device_info=None, destroy_disks=True)
with mock.patch.object(drvr, '_cleanup') as mock_cleanup:
self.assertRaises(
test.TestingException, drvr._cleanup_failed_start,
None, None, None, None, guest,
cleanup_instance_dir=mock.sentinel.cleanup_instance_dir,
cleanup_instance_disks=mock.sentinel.cleanup_instance_disks)
mock_cleanup.assert_called_once_with(
None, None, None, block_device_info=None, destroy_vifs=True,
cleanup_instance_dir=mock.sentinel.cleanup_instance_dir,
cleanup_instance_disks=mock.sentinel.cleanup_instance_disks)
self.assertTrue(guest.poweroff.called)
@mock.patch('os_brick.encryptors.get_encryption_metadata')

View File

@ -1303,6 +1303,71 @@ class LibvirtDriver(driver.ComputeDriver):
def cleanup(self, context, instance, network_info, block_device_info=None,
destroy_disks=True, migrate_data=None, destroy_vifs=True):
"""Cleanup the instance from the host.
Identify if the instance disks and instance path should be removed
from the host before calling down into the _cleanup method for the
actual removal of resources from the host.
:param context: security context
:param instance: instance object for the instance being cleaned up
:param network_info: instance network information
:param block_device_info: optional instance block device information
:param destroy_disks: if local ephemeral disks should be destroyed
:param migrate_data: optional migrate_data object
:param destroy_vifs: if plugged vifs should be unplugged
"""
cleanup_instance_dir = False
cleanup_instance_disks = False
# We assume destroy_disks means destroy instance directory and disks
if destroy_disks:
cleanup_instance_dir = True
cleanup_instance_disks = True
else:
# NOTE(mdbooth): I think the theory here was that if this is a
# migration with shared block storage then we need to delete the
# instance directory because that's not shared. I'm pretty sure
# this is wrong.
if migrate_data and 'is_shared_block_storage' in migrate_data:
cleanup_instance_dir = migrate_data.is_shared_block_storage
# NOTE(lyarwood): The following workaround allows operators to
# ensure that non-shared instance directories are removed after an
# evacuation or revert resize when using the shared RBD
# imagebackend. This workaround is not required when cleaning up
# migrations that provide migrate_data to this method as the
# existing is_shared_block_storage conditional will cause the
# instance directory to be removed.
if not cleanup_instance_dir:
if CONF.workarounds.ensure_libvirt_rbd_instance_dir_cleanup:
cleanup_instance_dir = CONF.libvirt.images_type == 'rbd'
return self._cleanup(
context, instance, network_info,
block_device_info=block_device_info,
destroy_vifs=destroy_vifs,
cleanup_instance_dir=cleanup_instance_dir,
cleanup_instance_disks=cleanup_instance_disks)
def _cleanup(self, context, instance, network_info, block_device_info=None,
destroy_vifs=True, cleanup_instance_dir=False,
cleanup_instance_disks=False):
"""Cleanup the domain and any attached resources from the host.
This method cleans up any pmem devices, unplugs VIFs, disconnects
attached volumes and undefines the instance domain within libvirt.
It also optionally removes the ephemeral disks and the instance
directory from the host depending on the cleanup_instance_dir|disks
kwargs provided.
:param context: security context
:param instance: instance object for the instance being cleaned up
:param network_info: instance network information
:param block_device_info: optional instance block device information
:param destroy_vifs: if plugged vifs should be unplugged
:param cleanup_instance_dir: If the instance dir should be removed
:param cleanup_instance_disks: If the instance disks should be removed
"""
# zero the data on backend pmem device
vpmems = self._get_vpmems(instance)
if vpmems:
@ -1333,7 +1398,7 @@ class LibvirtDriver(driver.ComputeDriver):
self._disconnect_volume(context, connection_info, instance)
except Exception as exc:
with excutils.save_and_reraise_exception() as ctxt:
if destroy_disks:
if cleanup_instance_disks:
# Don't block on Volume errors if we're trying to
# delete the instance as we may be partially created
# or deleted
@ -1345,26 +1410,14 @@ class LibvirtDriver(driver.ComputeDriver):
'exc': encodeutils.exception_to_unicode(exc)},
instance=instance)
if destroy_disks:
if cleanup_instance_disks:
# NOTE(haomai): destroy volumes if needed
if CONF.libvirt.images_type == 'lvm':
self._cleanup_lvm(instance, block_device_info)
if CONF.libvirt.images_type == 'rbd':
self._cleanup_rbd(instance)
is_shared_block_storage = False
if migrate_data and 'is_shared_block_storage' in migrate_data:
is_shared_block_storage = migrate_data.is_shared_block_storage
# NOTE(lyarwood): The following workaround allows operators to ensure
# that non-shared instance directories are removed after an evacuation
# or revert resize when using the shared RBD imagebackend. This
# workaround is not required when cleaning up migrations that provide
# migrate_data to this method as the existing is_shared_block_storage
# conditional will cause the instance directory to be removed.
if ((destroy_disks or is_shared_block_storage) or
(CONF.workarounds.ensure_libvirt_rbd_instance_dir_cleanup and
CONF.libvirt.images_type == 'rbd')):
if cleanup_instance_dir:
attempts = int(instance.system_metadata.get('clean_attempts',
'0'))
success = self.delete_instance_files(instance)
@ -3549,9 +3602,10 @@ class LibvirtDriver(driver.ComputeDriver):
gen_confdrive = functools.partial(self._create_configdrive,
context, instance,
injection_info)
self._create_image(context, instance, disk_info['mapping'],
injection_info=injection_info,
block_device_info=block_device_info)
created_instance_dir, created_disks = self._create_image(
context, instance, disk_info['mapping'],
injection_info=injection_info,
block_device_info=block_device_info)
# Required by Quobyte CI
self._ensure_console_log_for_instance(instance)
@ -3567,8 +3621,9 @@ class LibvirtDriver(driver.ComputeDriver):
context, xml, instance, network_info,
block_device_info=block_device_info,
post_xml_callback=gen_confdrive,
destroy_disks_on_failure=True,
power_on=power_on)
power_on=power_on,
cleanup_instance_dir=created_instance_dir,
cleanup_instance_disks=created_disks)
LOG.debug("Guest created on hypervisor", instance=instance)
def _wait_for_boot():
@ -3859,8 +3914,17 @@ class LibvirtDriver(driver.ComputeDriver):
def raw(fname):
return image(fname, image_type='raw')
created_instance_dir = True
# ensure directories exist and are writable
fileutils.ensure_tree(libvirt_utils.get_instance_path(instance))
instance_dir = libvirt_utils.get_instance_path(instance)
if os.path.exists(instance_dir):
LOG.debug("Instance directory exists: not creating",
instance=instance)
created_instance_dir = False
else:
LOG.debug("Creating instance directory", instance=instance)
fileutils.ensure_tree(libvirt_utils.get_instance_path(instance))
LOG.info('Creating image', instance=instance)
@ -3902,6 +3966,10 @@ class LibvirtDriver(driver.ComputeDriver):
'kernel_id': instance.kernel_id,
'ramdisk_id': instance.ramdisk_id}
# NOTE(mdbooth): kernel and ramdisk, if they are defined, are hardcoded
# to use raw, which means they will always be cleaned up with the
# instance directory. We must not consider them for created_disks,
# which may not be using the instance directory.
if disk_images['kernel_id']:
fname = imagecache.get_cache_fname(disk_images['kernel_id'])
raw('kernel').cache(fetch_func=libvirt_utils.fetch_raw_image,
@ -3921,10 +3989,9 @@ class LibvirtDriver(driver.ComputeDriver):
uid = pwd.getpwnam('root').pw_uid
nova.privsep.path.chown(image('disk').path, uid=uid)
self._create_and_inject_local_root(context, instance,
booted_from_volume, suffix,
disk_images, injection_info,
fallback_from_host)
created_disks = self._create_and_inject_local_root(
context, instance, booted_from_volume, suffix, disk_images,
injection_info, fallback_from_host)
# Lookup the filesystem type if required
os_type_with_default = nova.privsep.fs.get_fs_type_for_os_type(
@ -3938,6 +4005,9 @@ class LibvirtDriver(driver.ComputeDriver):
ephemeral_gb = instance.flavor.ephemeral_gb
if 'disk.local' in disk_mapping:
disk_image = image('disk.local')
# Short circuit the exists() tests if we already created a disk
created_disks = created_disks or not disk_image.exists()
fn = functools.partial(self._create_ephemeral,
fs_label='ephemeral0',
os_type=instance.os_type,
@ -3954,6 +4024,8 @@ class LibvirtDriver(driver.ComputeDriver):
for idx, eph in enumerate(driver.block_device_info_get_ephemerals(
block_device_info)):
disk_image = image(blockinfo.get_eph_disk(idx))
# Short circuit the exists() tests if we already created a disk
created_disks = created_disks or not disk_image.exists()
specified_fs = eph.get('guest_format')
if specified_fs and not self.is_supported_fs_format(specified_fs):
@ -3976,15 +4048,25 @@ class LibvirtDriver(driver.ComputeDriver):
if swap_mb > 0:
size = swap_mb * units.Mi
image('disk.swap').cache(fetch_func=self._create_swap,
context=context,
filename="swap_%s" % swap_mb,
size=size,
swap_mb=swap_mb)
swap = image('disk.swap')
# Short circuit the exists() tests if we already created a disk
created_disks = created_disks or not swap.exists()
swap.cache(fetch_func=self._create_swap, context=context,
filename="swap_%s" % swap_mb,
size=size, swap_mb=swap_mb)
if created_disks:
LOG.debug('Created local disks', instance=instance)
else:
LOG.debug('Did not create local disks', instance=instance)
return (created_instance_dir, created_disks)
def _create_and_inject_local_root(self, context, instance,
booted_from_volume, suffix, disk_images,
injection_info, fallback_from_host):
created_disks = False
# File injection only if needed
need_inject = (not configdrive.required_by(instance) and
injection_info is not None and
@ -4002,6 +4084,8 @@ class LibvirtDriver(driver.ComputeDriver):
backend = self.image_backend.by_name(instance, 'disk' + suffix,
CONF.libvirt.images_type)
created_disks = not backend.exists()
if instance.task_state == task_states.RESIZE_FINISH:
backend.create_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME)
if backend.SUPPORTS_CLONE:
@ -4039,6 +4123,8 @@ class LibvirtDriver(driver.ComputeDriver):
LOG.warning('File injection into a boot from volume '
'instance is not supported', instance=instance)
return created_disks
def _create_configdrive(self, context, instance, injection_info,
rescue=False):
# As this method being called right after the definition of a
@ -6363,21 +6449,26 @@ class LibvirtDriver(driver.ComputeDriver):
for vif in network_info if vif.get('active', True) is False]
def _cleanup_failed_start(self, context, instance, network_info,
block_device_info, guest, destroy_disks):
block_device_info, guest,
cleanup_instance_dir=False,
cleanup_instance_disks=False):
try:
if guest and guest.is_active():
guest.poweroff()
finally:
self.cleanup(context, instance, network_info=network_info,
block_device_info=block_device_info,
destroy_disks=destroy_disks)
self._cleanup(context, instance, network_info,
block_device_info=block_device_info,
destroy_vifs=True,
cleanup_instance_dir=cleanup_instance_dir,
cleanup_instance_disks=cleanup_instance_disks)
def _create_domain_and_network(self, context, xml, instance, network_info,
block_device_info=None, power_on=True,
vifs_already_plugged=False,
post_xml_callback=None,
destroy_disks_on_failure=False,
external_events=None):
external_events=None,
cleanup_instance_dir=False,
cleanup_instance_disks=False):
"""Do required network setup and create domain."""
timeout = CONF.vif_plugging_timeout
@ -6405,9 +6496,10 @@ class LibvirtDriver(driver.ComputeDriver):
# Neutron reported failure and we didn't swallow it, so
# bail here
with excutils.save_and_reraise_exception():
self._cleanup_failed_start(context, instance, network_info,
block_device_info, guest,
destroy_disks_on_failure)
self._cleanup_failed_start(
context, instance, network_info, block_device_info, guest,
cleanup_instance_dir=cleanup_instance_dir,
cleanup_instance_disks=cleanup_instance_disks)
except eventlet.timeout.Timeout:
# We never heard from Neutron
LOG.warning('Timeout waiting for %(events)s for '
@ -6418,18 +6510,19 @@ class LibvirtDriver(driver.ComputeDriver):
'task_state': instance.task_state},
instance=instance)
if CONF.vif_plugging_is_fatal:
self._cleanup_failed_start(context, instance, network_info,
block_device_info, guest,
destroy_disks_on_failure)
self._cleanup_failed_start(
context, instance, network_info, block_device_info, guest,
cleanup_instance_dir=cleanup_instance_dir,
cleanup_instance_disks=cleanup_instance_disks)
raise exception.VirtualInterfaceCreateException()
except Exception:
# Any other error, be sure to clean up
LOG.error('Failed to start libvirt guest', instance=instance)
with excutils.save_and_reraise_exception():
self._cleanup_failed_start(context, instance, network_info,
block_device_info, guest,
destroy_disks_on_failure)
self._cleanup_failed_start(
context, instance, network_info, block_device_info, guest,
cleanup_instance_dir=cleanup_instance_dir,
cleanup_instance_disks=cleanup_instance_disks)
# Resume only if domain has been paused
if pause:
guest.resume()