diff --git a/nova/conf/compute.py b/nova/conf/compute.py index 6cae12383f72..3ebeaec999d7 100644 --- a/nova/conf/compute.py +++ b/nova/conf/compute.py @@ -524,6 +524,10 @@ Possible values: * $state_path/instances where state_path is a config option that specifies the top-level directory for maintaining nova's state. (default) or Any string representing directory path. + +Related options: + +* ``[workarounds]/ensure_libvirt_rbd_instance_dir_cleanup`` """), cfg.BoolOpt('instance_usage_audit', default=False, diff --git a/nova/conf/libvirt.py b/nova/conf/libvirt.py index 41675024a330..34c4e2d4c626 100644 --- a/nova/conf/libvirt.py +++ b/nova/conf/libvirt.py @@ -714,6 +714,7 @@ Related options: * virt.use_cow_images * images_volume_group +* [workarounds]/ensure_libvirt_rbd_instance_dir_cleanup """), cfg.StrOpt('images_volume_group', help=""" diff --git a/nova/conf/workarounds.py b/nova/conf/workarounds.py index a53247759019..11ff37d12ea0 100644 --- a/nova/conf/workarounds.py +++ b/nova/conf/workarounds.py @@ -146,6 +146,36 @@ Related options: * [filter_scheduler]/track_instance_changes also relies on upcalls from the compute service to the scheduler service. +"""), + + cfg.BoolOpt( + 'ensure_libvirt_rbd_instance_dir_cleanup', + default=False, + help=""" +Ensure the instance directory is removed during clean up when using rbd. + +When enabled this workaround will ensure that the instance directory is always +removed during cleanup on hosts using ``[libvirt]/images_type=rbd``. This +avoids the following bugs with evacuation and revert resize clean up that lead +to the instance directory remaining on the host: + +https://bugs.launchpad.net/nova/+bug/1414895 + +https://bugs.launchpad.net/nova/+bug/1761062 + +Both of these bugs can then result in ``DestinationDiskExists`` errors being +raised if the instances ever attempt to return to the host. + +.. warning:: Operators will need to ensure that the instance directory itself, + specified by ``[DEFAULT]/instances_path``, is not shared between computes + before enabling this workaround otherwise the console.log, kernels, ramdisks + and any additional files being used by the running instance will be lost. + +Related options: + +* ``compute_driver`` (libvirt) +* ``[libvirt]/images_type`` (rbd) +* ``instances_path`` """), ] diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index 77101d784a21..bdc39e4271eb 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -16309,6 +16309,27 @@ class LibvirtConnTestCase(test.NoDBTestCase, self.assertTrue(instance.cleaned) save.assert_called_once_with() + @mock.patch.object(libvirt_driver.LibvirtDriver, 'unfilter_instance') + @mock.patch.object(libvirt_driver.LibvirtDriver, 'delete_instance_files', + return_value=True) + @mock.patch.object(objects.Instance, 'save') + @mock.patch.object(libvirt_driver.LibvirtDriver, '_undefine_domain') + def test_cleanup_instance_dir_with_rbd_workaround(self, + _undefine_domain, save, delete_instance_files, unfilter_instance): + self.flags(images_type='rbd', group='libvirt') + self.flags(ensure_libvirt_rbd_instance_dir_cleanup=True, + group='workarounds') + instance = objects.Instance(self.context, **self.test_instance) + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI()) + # destroy_disks=False here as check_instance_shared_storage_local call + # would return None when using the rbd imagebackend + drvr.cleanup(self.context, instance, network_info={}, + destroy_disks=False) + delete_instance_files.assert_called_once_with(instance) + self.assertEqual(1, int(instance.system_metadata['clean_attempts'])) + self.assertTrue(instance.cleaned) + save.assert_called_once_with() + @mock.patch.object(libvirt_driver.LibvirtDriver, '_get_volume_encryption') @mock.patch.object(libvirt_driver.LibvirtDriver, '_use_native_luks') def test_swap_volume_native_luks_blocked(self, mock_use_native_luks, diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 2b0e21e24533..c549d52e5aec 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -1086,7 +1086,16 @@ class LibvirtDriver(driver.ComputeDriver): is_shared_block_storage = False if migrate_data and 'is_shared_block_storage' in migrate_data: is_shared_block_storage = migrate_data.is_shared_block_storage - if destroy_disks or is_shared_block_storage: + # NOTE(lyarwood): The following workaround allows operators to ensure + # that non-shared instance directories are removed after an evacuation + # or revert resize when using the shared RBD imagebackend. This + # workaround is not required when cleaning up migrations that provide + # migrate_data to this method as the existing is_shared_block_storage + # conditional will cause the instance directory to be removed. + if ((destroy_disks or is_shared_block_storage) or + (CONF.workarounds.ensure_libvirt_rbd_instance_dir_cleanup and + CONF.libvirt.images_type == 'rbd')): + attempts = int(instance.system_metadata.get('clean_attempts', '0')) success = self.delete_instance_files(instance) diff --git a/releasenotes/notes/bug-1414895-8f7d8da6499f8e94.yaml b/releasenotes/notes/bug-1414895-8f7d8da6499f8e94.yaml new file mode 100644 index 000000000000..8fa6d127461c --- /dev/null +++ b/releasenotes/notes/bug-1414895-8f7d8da6499f8e94.yaml @@ -0,0 +1,24 @@ +--- +other: + - | + The ``[workarounds]/ensure_libvirt_rbd_instance_dir_cleanup`` configuration + option has been introduced. This can be used by operators to ensure that + instance directories are always removed during cleanup within the Libvirt + driver while using ``[libvirt]/images_type = rbd``. This works around known + issues such as `bug 1414895`_ when cleaning up after an evacuation and + `bug 1761062`_ when reverting from an instance resize. + + Operators should be aware that this workaround only applies when using the + libvirt compute driver and rbd images_type as enabled by the following + configuration options: + + * ``[DEFAULT]/compute_driver = libvirt`` + * ``[libvirt]/images_type = rbd`` + + .. warning:: Operators will need to ensure that the instance directory + itself, specified by ``[DEFAULT]/instances_path``, is not shared between + computes before enabling this workaround, otherwise files associated with + running instances may be removed. + + .. _bug 1414895: https://bugs.launchpad.net/nova/+bug/1414895 + .. _bug 1761062: https://bugs.launchpad.net/nova/+bug/1761062