XenAPI: Fix race-condition with cached images.
The core problem is that XenServer's `VDI.copy` call drops the destination file directly into the SR. This means that half-completed files are visible with no way to distinguish these from fully-copied files. We had some code that attempted to mitigate this issue by checking physical_utilisation against an expected value. The problem with this code is that it didn't account for VDI chaining where the physical_utilisation would not necessarily match the parent. The net effect of this was that 'cloned' VDIs would never be found because their physical_utilisation was far below what was expected. The work around is to create our own `_safe_copy_vdi` which is isolated and atomic. Long term, `VDI.copy` should be fixed so that half-completed files are never stored in the SR. Change-Id: I6eb3cb5259f9ee1c7394e58f76105a8b39bfc720
This commit is contained in:
parent
c577144166
commit
4f3291e379
|
@ -288,6 +288,15 @@ class XenAPIVMTestCase(stubs.XenAPITestBase):
|
|||
self.stubs.Set(vmops.VMOps, 'inject_instance_metadata',
|
||||
fake_inject_instance_metadata)
|
||||
|
||||
def fake_safe_copy_vdi(session, sr_ref, instance, vdi_to_copy_ref):
|
||||
name_label = "fakenamelabel"
|
||||
disk_type = "fakedisktype"
|
||||
virtual_size = 777
|
||||
return vm_utils.create_vdi(
|
||||
session, sr_ref, instance, name_label, disk_type,
|
||||
virtual_size)
|
||||
self.stubs.Set(vm_utils, '_safe_copy_vdi', fake_safe_copy_vdi)
|
||||
|
||||
def tearDown(self):
|
||||
super(XenAPIVMTestCase, self).tearDown()
|
||||
fake_image.FakeImageService_reset()
|
||||
|
|
|
@ -428,11 +428,63 @@ def get_vdis_for_instance(context, session, instance, name_label, image,
|
|||
image_type)
|
||||
|
||||
|
||||
def _copy_vdi(session, sr_ref, vdi_to_copy_ref):
|
||||
"""Copy a VDI and return the new VDIs reference."""
|
||||
vdi_ref = session.call_xenapi('VDI.copy', vdi_to_copy_ref, sr_ref)
|
||||
LOG.debug(_('Copied VDI %(vdi_ref)s from VDI '
|
||||
'%(vdi_to_copy_ref)s on %(sr_ref)s.') % locals())
|
||||
@contextlib.contextmanager
|
||||
def _dummy_vm(session, instance, vdi_ref):
|
||||
"""This creates a temporary VM so that we can snapshot a VDI.
|
||||
|
||||
VDI's can't be snapshotted directly since the API expects a `vm_ref`. To
|
||||
work around this, we need to create a temporary VM and then map the VDI to
|
||||
the VM using a temporary VBD.
|
||||
"""
|
||||
name_label = "dummy"
|
||||
vm_ref = create_vm(session, instance, name_label, None, None)
|
||||
try:
|
||||
vbd_ref = create_vbd(session, vm_ref, vdi_ref, 'autodetect',
|
||||
read_only=True)
|
||||
try:
|
||||
yield vm_ref
|
||||
finally:
|
||||
try:
|
||||
destroy_vbd(session, vbd_ref)
|
||||
except volume_utils.StorageError:
|
||||
# destroy_vbd() will log error
|
||||
pass
|
||||
finally:
|
||||
destroy_vm(session, instance, vm_ref)
|
||||
|
||||
|
||||
def _safe_copy_vdi(session, sr_ref, instance, vdi_to_copy_ref):
|
||||
"""Copy a VDI and return the new VDIs reference.
|
||||
|
||||
This function differs from the XenAPI `VDI.copy` call in that the copy is
|
||||
atomic and isolated, meaning we don't see half-downloaded images. It
|
||||
accomplishes this by copying the VDI's into a temporary directory and then
|
||||
atomically renaming them into the SR when the copy is completed.
|
||||
|
||||
The correct long term solution is to fix `VDI.copy` so that it is atomic
|
||||
and isolated.
|
||||
"""
|
||||
with _dummy_vm(session, instance, vdi_to_copy_ref) as vm_ref:
|
||||
label = "snapshot"
|
||||
|
||||
with snapshot_attached_here(
|
||||
session, instance, vm_ref, label) as vdi_uuids:
|
||||
params = {'sr_path': get_sr_path(session),
|
||||
'vdi_uuids': vdi_uuids,
|
||||
'uuid_stack': _make_uuid_stack()}
|
||||
|
||||
kwargs = {'params': pickle.dumps(params)}
|
||||
result = session.call_plugin(
|
||||
'workarounds', 'safe_copy_vdis', kwargs)
|
||||
imported_vhds = jsonutils.loads(result)
|
||||
|
||||
root_uuid = imported_vhds['root']['uuid']
|
||||
|
||||
# TODO(sirp): for safety, we should probably re-scan the SR after every
|
||||
# call to a dom0 plugin, since there is a possibility that the underlying
|
||||
# VHDs changed
|
||||
scan_default_sr(session)
|
||||
vdi_ref = session.call_xenapi('VDI.get_by_uuid', root_uuid)
|
||||
return vdi_ref
|
||||
|
||||
|
||||
|
@ -526,22 +578,7 @@ def find_cached_image(session, image_id, sr_ref):
|
|||
"""Returns the vdi-ref of the cached image."""
|
||||
for vdi_ref, vdi_rec in _get_all_vdis_in_sr(session, sr_ref):
|
||||
other_config = vdi_rec['other_config']
|
||||
|
||||
try:
|
||||
image_id_match = other_config['image-id'] == image_id
|
||||
except KeyError:
|
||||
image_id_match = False
|
||||
|
||||
# NOTE(sirp): `VDI.copy` stores the partially-completed file in the SR.
|
||||
# In order to avoid these half-baked files, we compare its current size
|
||||
# to the expected size pulled from the original cache file.
|
||||
try:
|
||||
size_match = (other_config['expected_physical_utilisation'] ==
|
||||
vdi_rec['physical_utilisation'])
|
||||
except KeyError:
|
||||
size_match = False
|
||||
|
||||
if image_id_match and size_match:
|
||||
if image_id == other_config.get('image-id'):
|
||||
return vdi_ref
|
||||
|
||||
return None
|
||||
|
@ -764,24 +801,16 @@ def _create_cached_image(context, session, instance, name_label,
|
|||
session.call_xenapi('VDI.add_to_other_config',
|
||||
root_vdi_ref, 'image-id', str(image_id))
|
||||
|
||||
for vdi_type, vdi in vdis.iteritems():
|
||||
vdi_ref = session.call_xenapi('VDI.get_by_uuid',
|
||||
vdi['uuid'])
|
||||
|
||||
vdi_rec = session.call_xenapi('VDI.get_record', vdi_ref)
|
||||
session.call_xenapi('VDI.add_to_other_config',
|
||||
vdi_ref, 'expected_physical_utilisation',
|
||||
vdi_rec['physical_utilisation'])
|
||||
|
||||
if vdi_type == 'swap':
|
||||
session.call_xenapi('VDI.add_to_other_config',
|
||||
root_vdi_ref, 'swap-disk',
|
||||
str(vdi['uuid']))
|
||||
swap_vdi = vdis.get('swap')
|
||||
if swap_vdi:
|
||||
session.call_xenapi(
|
||||
'VDI.add_to_other_config', root_vdi_ref, 'swap-disk',
|
||||
str(swap_vdi['uuid']))
|
||||
|
||||
if FLAGS.use_cow_images and sr_type == 'ext':
|
||||
new_vdi_ref = _clone_vdi(session, root_vdi_ref)
|
||||
else:
|
||||
new_vdi_ref = _copy_vdi(session, sr_ref, root_vdi_ref)
|
||||
new_vdi_ref = _safe_copy_vdi(session, sr_ref, instance, root_vdi_ref)
|
||||
|
||||
# Set the name label for the image we just created and remove image id
|
||||
# field from other-config.
|
||||
|
@ -799,7 +828,8 @@ def _create_cached_image(context, session, instance, name_label,
|
|||
swap_disk_uuid = vdi_rec['other_config']['swap-disk']
|
||||
swap_vdi_ref = session.call_xenapi('VDI.get_by_uuid',
|
||||
swap_disk_uuid)
|
||||
new_swap_vdi_ref = _copy_vdi(session, sr_ref, swap_vdi_ref)
|
||||
new_swap_vdi_ref = _safe_copy_vdi(
|
||||
session, sr_ref, instance, swap_vdi_ref)
|
||||
new_swap_vdi_uuid = session.call_xenapi('VDI.get_uuid',
|
||||
new_swap_vdi_ref)
|
||||
vdis['swap'] = dict(uuid=new_swap_vdi_uuid, file=None)
|
||||
|
|
|
@ -33,6 +33,7 @@ rm -rf $RPM_BUILD_ROOT
|
|||
/etc/xapi.d/plugins/kernel
|
||||
/etc/xapi.d/plugins/migration
|
||||
/etc/xapi.d/plugins/pluginlib_nova.py
|
||||
/etc/xapi.d/plugins/workarounds
|
||||
/etc/xapi.d/plugins/xenhost
|
||||
/etc/xapi.d/plugins/xenstore.py
|
||||
/etc/xapi.d/plugins/utils.py
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
# Copyright (c) 2012 Openstack, LLC
|
||||
# All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
"""Handle the uploading and downloading of images via Glance."""
|
||||
|
||||
import cPickle as pickle
|
||||
try:
|
||||
import json
|
||||
except ImportError:
|
||||
import simplejson as json
|
||||
import os
|
||||
import shutil
|
||||
|
||||
import XenAPIPlugin
|
||||
|
||||
import utils
|
||||
|
||||
#FIXME(sirp): should this use pluginlib from 5.6?
|
||||
from pluginlib_nova import *
|
||||
configure_logging('hacks')
|
||||
|
||||
|
||||
def _copy_vdis(sr_path, staging_path, vdi_uuids):
|
||||
seq_num = 0
|
||||
for vdi_uuid in vdi_uuids:
|
||||
src = os.path.join(sr_path, "%s.vhd" % vdi_uuid)
|
||||
dst = os.path.join(staging_path, "%d.vhd" % seq_num)
|
||||
shutil.copyfile(src, dst)
|
||||
seq_num += 1
|
||||
|
||||
|
||||
def safe_copy_vdis(session, args):
|
||||
params = pickle.loads(exists(args, 'params'))
|
||||
sr_path = params["sr_path"]
|
||||
vdi_uuids = params["vdi_uuids"]
|
||||
uuid_stack = params["uuid_stack"]
|
||||
|
||||
staging_path = utils.make_staging_area(sr_path)
|
||||
try:
|
||||
_copy_vdis(sr_path, staging_path, vdi_uuids)
|
||||
imported_vhds = utils.import_vhds(sr_path, staging_path, uuid_stack)
|
||||
finally:
|
||||
utils.cleanup_staging_area(staging_path)
|
||||
|
||||
# Right now, it's easier to return a single string via XenAPI,
|
||||
# so we'll json encode the list of VHDs.
|
||||
return json.dumps(imported_vhds)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
XenAPIPlugin.dispatch({'safe_copy_vdis': safe_copy_vdis})
|
Loading…
Reference in New Issue