From 95e979d90722293916ff7163fe25f835f785f39e Mon Sep 17 00:00:00 2001 From: Lucian Petrut Date: Fri, 9 Feb 2018 09:39:10 +0200 Subject: [PATCH] Lock snapshot/destroy operations At some point, we've removed the instance snapshot lock, allowing destroy requests to proceed immediately. We assumed that this should be fine as long as we're stopping pending WMI jobs. The issue is that the snapshot operation is still not fully preemptive. VHD related operations (e.g. copy, merge, upload to glance) are not preemptive at the moment, for which reason destroy requests will fail due to file locks. Unless the new 'force_destroy_instances' config option is enabled (disabled by default), we're locking the destroy/snapshot operations. This partially reverts commit e4208017feb92de8bfbe1c3b28d6b6e3cea7ab2c. Closes-Bug: #1748394 Change-Id: I3119c5bc5a714be867af19510dfab576f354835b --- compute_hyperv/nova/conf.py | 7 ++++++ compute_hyperv/nova/constants.py | 4 ++++ compute_hyperv/nova/snapshotops.py | 18 +++++++++++++++ compute_hyperv/nova/vmops.py | 22 +++++++++++++++++-- compute_hyperv/tests/unit/test_snapshotops.py | 17 ++++++++++++++ compute_hyperv/tests/unit/test_vmops.py | 7 ++++-- 6 files changed, 71 insertions(+), 4 deletions(-) diff --git a/compute_hyperv/nova/conf.py b/compute_hyperv/nova/conf.py index 8d683849..a3d64228 100644 --- a/compute_hyperv/nova/conf.py +++ b/compute_hyperv/nova/conf.py @@ -54,6 +54,13 @@ hyperv_opts = [ default=True, help="Allow the VM the failback to its original host once it " "is available."), + cfg.BoolOpt('force_destroy_instances', + default=False, + help="If this option is enabled, instance destroy requests " + "are executed immediately, regardless of instance " + "pending tasks. In some situations, the destroy " + "operation will fail (e.g. due to file locks), " + "requiring subsequent retries."), ] CONF = nova.conf.CONF diff --git a/compute_hyperv/nova/constants.py b/compute_hyperv/nova/constants.py index 937ece8e..a9c7fe6f 100644 --- a/compute_hyperv/nova/constants.py +++ b/compute_hyperv/nova/constants.py @@ -110,3 +110,7 @@ OPTIONAL = "optional" IMAGE_PROP_VTPM = "os_vtpm" IMAGE_PROP_VTPM_SHIELDED = "os_shielded_vm" + +# We have to make sure that such locks are not used outside the driver in +# order to avoid deadlocks. For this reason, we'll use the 'hv-' scope. +SNAPSHOT_LOCK_TEMPLATE = "%(instance_uuid)s-hv-snapshot" diff --git a/compute_hyperv/nova/snapshotops.py b/compute_hyperv/nova/snapshotops.py index 6f760cb8..108e5635 100644 --- a/compute_hyperv/nova/snapshotops.py +++ b/compute_hyperv/nova/snapshotops.py @@ -19,10 +19,14 @@ Management class for VM snapshot operations. import os from nova.compute import task_states +from nova import exception from nova.image import glance +from nova import utils +from os_win import exceptions as os_win_exc from os_win import utilsfactory from oslo_log import log as logging +from compute_hyperv.nova import constants from compute_hyperv.nova import pathutils LOG = logging.getLogger(__name__) @@ -45,6 +49,20 @@ class SnapshotOps(object): purge_props=False) def snapshot(self, context, instance, image_id, update_task_state): + # This operation is not fully preemptive at the moment. We're locking + # it as well as the destroy operation (if configured to do so). + @utils.synchronized(constants.SNAPSHOT_LOCK_TEMPLATE % + dict(instance_uuid=instance.uuid)) + def instance_synchronized_snapshot(): + self._snapshot(context, instance, image_id, update_task_state) + + try: + instance_synchronized_snapshot() + except os_win_exc.HyperVVMNotFoundException: + # the instance might disappear before starting the operation. + raise exception.InstanceNotFound(instance_id=instance.uuid) + + def _snapshot(self, context, instance, image_id, update_task_state): """Create snapshot from a running VM instance.""" instance_name = instance.name diff --git a/compute_hyperv/nova/vmops.py b/compute_hyperv/nova/vmops.py index c988a2bc..c8252ea2 100644 --- a/compute_hyperv/nova/vmops.py +++ b/compute_hyperv/nova/vmops.py @@ -856,8 +856,26 @@ class VMOps(object): if backup_location: self._pathutils.check_remove_dir(backup_location) - def destroy(self, instance, network_info, block_device_info, - destroy_disks=True, cleanup_migration_files=True): + def destroy(self, instance, *args, **kwargs): + # Nova allows destroying instances regardless of pending tasks. + # In some cases, we may not be able to properly delete instances + # while having a pending task (e.g. when snapshotting, due to file + # locks). + # + # We may append other locks for operations that are non preemptive. + # We should not rely on instance task states, which may be hanging. + @utils.synchronized(constants.SNAPSHOT_LOCK_TEMPLATE % + dict(instance_uuid=instance.uuid)) + def synchronized_destroy(): + self._destroy(instance, *args, **kwargs) + + if CONF.hyperv.force_destroy_instances: + self._destroy(instance, *args, **kwargs) + else: + synchronized_destroy() + + def _destroy(self, instance, network_info, block_device_info, + destroy_disks=True, cleanup_migration_files=True): instance_name = instance.name LOG.info("Got request to destroy instance", instance=instance) diff --git a/compute_hyperv/tests/unit/test_snapshotops.py b/compute_hyperv/tests/unit/test_snapshotops.py index 1146e602..7b1480fe 100644 --- a/compute_hyperv/tests/unit/test_snapshotops.py +++ b/compute_hyperv/tests/unit/test_snapshotops.py @@ -17,6 +17,8 @@ import os import mock from nova.compute import task_states +from nova import exception +from os_win import exceptions as os_win_exc from compute_hyperv.nova import snapshotops from compute_hyperv.tests import fake_instance @@ -123,3 +125,18 @@ class SnapshotOpsTestCase(test_base.HyperVBaseTestCase): def test_snapshot_no_base_disk(self): self._test_snapshot(base_disk_path=None) + + @mock.patch.object(snapshotops.SnapshotOps, '_snapshot') + def test_snapshot_instance_not_found(self, mock_snapshot): + mock_instance = fake_instance.fake_instance_obj(self.context) + mock_snapshot.side_effect = os_win_exc.HyperVVMNotFoundException( + vm_name=mock_instance.name) + + self.assertRaises(exception.InstanceNotFound, + self._snapshotops.snapshot, + self.context, mock_instance, mock.sentinel.image_id, + mock.sentinel.update_task_state) + + mock_snapshot.assert_called_once_with(self.context, mock_instance, + mock.sentinel.image_id, + mock.sentinel.update_task_state) diff --git a/compute_hyperv/tests/unit/test_vmops.py b/compute_hyperv/tests/unit/test_vmops.py index 1933405d..bb9d201d 100644 --- a/compute_hyperv/tests/unit/test_vmops.py +++ b/compute_hyperv/tests/unit/test_vmops.py @@ -1315,7 +1315,7 @@ class VMOpsTestCase(test_base.HyperVBaseTestCase): self._pathutils.check_remove_dir.assert_has_calls( exp_check_remove_dir_calls) - @ddt.data({}, + @ddt.data({"force_destroy": True}, {'vm_exists': False, 'planned_vm_exists': False}, {'vm_exists': False, 'planned_vm_exists': True}) @ddt.unpack @@ -1324,7 +1324,10 @@ class VMOpsTestCase(test_base.HyperVBaseTestCase): @mock.patch('compute_hyperv.nova.vmops.VMOps.unplug_vifs') def test_destroy(self, mock_unplug_vifs, mock_power_off, mock_delete_disk_files, vm_exists=True, - planned_vm_exists=False): + planned_vm_exists=False, + force_destroy=False): + self.flags(force_destroy_instances=force_destroy, group="hyperv") + mock_instance = fake_instance.fake_instance_obj(self.context) self._vmops._vmutils.vm_exists.return_value = vm_exists self._vmops._migrutils.planned_vm_exists.return_value = (