Lock snapshot/destroy operations

At some point, we've removed the instance snapshot lock, allowing
destroy requests to proceed immediately. We assumed that this
should be fine as long as we're stopping pending WMI jobs.

The issue is that the snapshot operation is still not fully
preemptive. VHD related operations (e.g. copy, merge, upload to
glance) are not preemptive at the moment, for which reason
destroy requests will fail due to file locks.

Unless the new 'force_destroy_instances' config option is enabled
(disabled by default), we're locking the destroy/snapshot operations.

This partially reverts commit
e4208017fe.

Closes-Bug: #1748394

Change-Id: I3119c5bc5a714be867af19510dfab576f354835b
This commit is contained in:
Lucian Petrut 2018-02-09 09:39:10 +02:00
parent 19917084d3
commit 95e979d907
6 changed files with 71 additions and 4 deletions

View File

@ -54,6 +54,13 @@ hyperv_opts = [
default=True,
help="Allow the VM the failback to its original host once it "
"is available."),
cfg.BoolOpt('force_destroy_instances',
default=False,
help="If this option is enabled, instance destroy requests "
"are executed immediately, regardless of instance "
"pending tasks. In some situations, the destroy "
"operation will fail (e.g. due to file locks), "
"requiring subsequent retries."),
]
CONF = nova.conf.CONF

View File

@ -110,3 +110,7 @@ OPTIONAL = "optional"
IMAGE_PROP_VTPM = "os_vtpm"
IMAGE_PROP_VTPM_SHIELDED = "os_shielded_vm"
# We have to make sure that such locks are not used outside the driver in
# order to avoid deadlocks. For this reason, we'll use the 'hv-' scope.
SNAPSHOT_LOCK_TEMPLATE = "%(instance_uuid)s-hv-snapshot"

View File

@ -19,10 +19,14 @@ Management class for VM snapshot operations.
import os
from nova.compute import task_states
from nova import exception
from nova.image import glance
from nova import utils
from os_win import exceptions as os_win_exc
from os_win import utilsfactory
from oslo_log import log as logging
from compute_hyperv.nova import constants
from compute_hyperv.nova import pathutils
LOG = logging.getLogger(__name__)
@ -45,6 +49,20 @@ class SnapshotOps(object):
purge_props=False)
def snapshot(self, context, instance, image_id, update_task_state):
# This operation is not fully preemptive at the moment. We're locking
# it as well as the destroy operation (if configured to do so).
@utils.synchronized(constants.SNAPSHOT_LOCK_TEMPLATE %
dict(instance_uuid=instance.uuid))
def instance_synchronized_snapshot():
self._snapshot(context, instance, image_id, update_task_state)
try:
instance_synchronized_snapshot()
except os_win_exc.HyperVVMNotFoundException:
# the instance might disappear before starting the operation.
raise exception.InstanceNotFound(instance_id=instance.uuid)
def _snapshot(self, context, instance, image_id, update_task_state):
"""Create snapshot from a running VM instance."""
instance_name = instance.name

View File

@ -856,8 +856,26 @@ class VMOps(object):
if backup_location:
self._pathutils.check_remove_dir(backup_location)
def destroy(self, instance, network_info, block_device_info,
destroy_disks=True, cleanup_migration_files=True):
def destroy(self, instance, *args, **kwargs):
# Nova allows destroying instances regardless of pending tasks.
# In some cases, we may not be able to properly delete instances
# while having a pending task (e.g. when snapshotting, due to file
# locks).
#
# We may append other locks for operations that are non preemptive.
# We should not rely on instance task states, which may be hanging.
@utils.synchronized(constants.SNAPSHOT_LOCK_TEMPLATE %
dict(instance_uuid=instance.uuid))
def synchronized_destroy():
self._destroy(instance, *args, **kwargs)
if CONF.hyperv.force_destroy_instances:
self._destroy(instance, *args, **kwargs)
else:
synchronized_destroy()
def _destroy(self, instance, network_info, block_device_info,
destroy_disks=True, cleanup_migration_files=True):
instance_name = instance.name
LOG.info("Got request to destroy instance", instance=instance)

View File

@ -17,6 +17,8 @@ import os
import mock
from nova.compute import task_states
from nova import exception
from os_win import exceptions as os_win_exc
from compute_hyperv.nova import snapshotops
from compute_hyperv.tests import fake_instance
@ -123,3 +125,18 @@ class SnapshotOpsTestCase(test_base.HyperVBaseTestCase):
def test_snapshot_no_base_disk(self):
self._test_snapshot(base_disk_path=None)
@mock.patch.object(snapshotops.SnapshotOps, '_snapshot')
def test_snapshot_instance_not_found(self, mock_snapshot):
mock_instance = fake_instance.fake_instance_obj(self.context)
mock_snapshot.side_effect = os_win_exc.HyperVVMNotFoundException(
vm_name=mock_instance.name)
self.assertRaises(exception.InstanceNotFound,
self._snapshotops.snapshot,
self.context, mock_instance, mock.sentinel.image_id,
mock.sentinel.update_task_state)
mock_snapshot.assert_called_once_with(self.context, mock_instance,
mock.sentinel.image_id,
mock.sentinel.update_task_state)

View File

@ -1315,7 +1315,7 @@ class VMOpsTestCase(test_base.HyperVBaseTestCase):
self._pathutils.check_remove_dir.assert_has_calls(
exp_check_remove_dir_calls)
@ddt.data({},
@ddt.data({"force_destroy": True},
{'vm_exists': False, 'planned_vm_exists': False},
{'vm_exists': False, 'planned_vm_exists': True})
@ddt.unpack
@ -1324,7 +1324,10 @@ class VMOpsTestCase(test_base.HyperVBaseTestCase):
@mock.patch('compute_hyperv.nova.vmops.VMOps.unplug_vifs')
def test_destroy(self, mock_unplug_vifs, mock_power_off,
mock_delete_disk_files, vm_exists=True,
planned_vm_exists=False):
planned_vm_exists=False,
force_destroy=False):
self.flags(force_destroy_instances=force_destroy, group="hyperv")
mock_instance = fake_instance.fake_instance_obj(self.context)
self._vmops._vmutils.vm_exists.return_value = vm_exists
self._vmops._migrutils.planned_vm_exists.return_value = (