Fix race conditions between imagebackend and imagecache
The race may occur in the situation: * There is a base file that is not used for a long time enough to be removed by imagecache. * imagebackend is provisioning a virtual disk from the base file. * imagecache is removing the base file. Then, the base file is removed even though it is about to be used. To fix this, these changes are in this patch: * A new function imagecache.refresh_timestamp(base_file) updates the owner and mtime of the base file with the lock dedicated to the base file. * imagebacked calls refresh_timestamp(base_file) before provision a disk from the base file. * imagecache.ImageCacheManager._remove_base_file(base_file) uses the same lock as used by refresh_timestamp() Closes-Bug: #1256838 Change-Id: I7c897cf6071d87a2c4532fb3a73863d649d02782
This commit is contained in:
parent
7e4075301c
commit
2593469103
|
@ -29,6 +29,7 @@ from nova.tests import fake_processutils
|
|||
from nova.tests.virt.libvirt import fake_libvirt_utils
|
||||
from nova import unit
|
||||
from nova.virt.libvirt import imagebackend
|
||||
from nova.virt.libvirt import imagecache
|
||||
|
||||
CONF = cfg.CONF
|
||||
|
||||
|
@ -65,6 +66,7 @@ class _ImageTestCase(object):
|
|||
fake_libvirt_utils))
|
||||
|
||||
def test_cache(self):
|
||||
self.mox.StubOutWithMock(imagecache, 'refresh_timestamp')
|
||||
self.mox.StubOutWithMock(os.path, 'exists')
|
||||
if self.OLD_STYLE_INSTANCE_PATH:
|
||||
os.path.exists(self.OLD_STYLE_INSTANCE_PATH).AndReturn(False)
|
||||
|
@ -74,6 +76,7 @@ class _ImageTestCase(object):
|
|||
fn(target=self.TEMPLATE_PATH)
|
||||
self.mox.StubOutWithMock(imagebackend.fileutils, 'ensure_tree')
|
||||
imagebackend.fileutils.ensure_tree(self.TEMPLATE_DIR)
|
||||
imagecache.refresh_timestamp(self.TEMPLATE_PATH)
|
||||
self.mox.ReplayAll()
|
||||
|
||||
image = self.image_class(self.INSTANCE, self.NAME)
|
||||
|
@ -83,12 +86,14 @@ class _ImageTestCase(object):
|
|||
self.mox.VerifyAll()
|
||||
|
||||
def test_cache_image_exists(self):
|
||||
self.mox.StubOutWithMock(imagecache, 'refresh_timestamp')
|
||||
self.mox.StubOutWithMock(os.path, 'exists')
|
||||
if self.OLD_STYLE_INSTANCE_PATH:
|
||||
os.path.exists(self.OLD_STYLE_INSTANCE_PATH).AndReturn(False)
|
||||
os.path.exists(self.TEMPLATE_DIR).AndReturn(True)
|
||||
os.path.exists(self.PATH).AndReturn(True)
|
||||
os.path.exists(self.TEMPLATE_PATH).AndReturn(True)
|
||||
imagecache.refresh_timestamp(self.TEMPLATE_PATH)
|
||||
self.mox.ReplayAll()
|
||||
|
||||
image = self.image_class(self.INSTANCE, self.NAME)
|
||||
|
@ -97,11 +102,13 @@ class _ImageTestCase(object):
|
|||
self.mox.VerifyAll()
|
||||
|
||||
def test_cache_base_dir_exists(self):
|
||||
self.mox.StubOutWithMock(imagecache, 'refresh_timestamp')
|
||||
self.mox.StubOutWithMock(os.path, 'exists')
|
||||
if self.OLD_STYLE_INSTANCE_PATH:
|
||||
os.path.exists(self.OLD_STYLE_INSTANCE_PATH).AndReturn(False)
|
||||
os.path.exists(self.TEMPLATE_DIR).AndReturn(True)
|
||||
os.path.exists(self.PATH).AndReturn(False)
|
||||
imagecache.refresh_timestamp(self.TEMPLATE_PATH)
|
||||
fn = self.mox.CreateMockAnything()
|
||||
fn(target=self.TEMPLATE_PATH)
|
||||
self.mox.StubOutWithMock(imagebackend.fileutils, 'ensure_tree')
|
||||
|
@ -114,11 +121,13 @@ class _ImageTestCase(object):
|
|||
self.mox.VerifyAll()
|
||||
|
||||
def test_cache_template_exists(self):
|
||||
self.mox.StubOutWithMock(imagecache, 'refresh_timestamp')
|
||||
self.mox.StubOutWithMock(os.path, 'exists')
|
||||
if self.OLD_STYLE_INSTANCE_PATH:
|
||||
os.path.exists(self.OLD_STYLE_INSTANCE_PATH).AndReturn(False)
|
||||
os.path.exists(self.TEMPLATE_DIR).AndReturn(True)
|
||||
os.path.exists(self.PATH).AndReturn(False)
|
||||
imagecache.refresh_timestamp(self.TEMPLATE_PATH)
|
||||
fn = self.mox.CreateMockAnything()
|
||||
fn(target=self.TEMPLATE_PATH)
|
||||
self.mox.ReplayAll()
|
||||
|
@ -141,6 +150,7 @@ class _ImageTestCase(object):
|
|||
|
||||
self.stubs.Set(os.path, 'exists', lambda _: True)
|
||||
self.stubs.Set(os, 'access', lambda p, w: True)
|
||||
self.stubs.Set(imagecache, 'refresh_timestamp', lambda _: None)
|
||||
|
||||
# Call twice to verify testing fallocate is only called once.
|
||||
image.cache(fake_fetch, self.TEMPLATE_PATH, self.SIZE)
|
||||
|
@ -165,6 +175,7 @@ class _ImageTestCase(object):
|
|||
self.stubs.Set(image, '_can_fallocate', lambda: True)
|
||||
self.stubs.Set(os.path, 'exists', lambda _: True)
|
||||
self.stubs.Set(os, 'access', lambda p, w: False)
|
||||
self.stubs.Set(imagecache, 'refresh_timestamp', lambda _: None)
|
||||
|
||||
# Testing fallocate is only called when user has write access.
|
||||
image.cache(fake_fetch, self.TEMPLATE_PATH, self.SIZE)
|
||||
|
@ -502,6 +513,7 @@ class LvmTestCase(_ImageTestCase, test.NoDBTestCase):
|
|||
|
||||
self.stubs.Set(os.path, 'exists', lambda _: True)
|
||||
self.stubs.Set(image, 'check_image_exists', lambda: True)
|
||||
self.stubs.Set(imagecache, 'refresh_timestamp', lambda _: None)
|
||||
|
||||
image.cache(fake_fetch, self.TEMPLATE_PATH, self.SIZE)
|
||||
|
||||
|
@ -537,8 +549,10 @@ class RbdTestCase(_ImageTestCase, test.NoDBTestCase):
|
|||
|
||||
self.mox.StubOutWithMock(os.path, 'exists')
|
||||
self.mox.StubOutWithMock(image, 'check_image_exists')
|
||||
self.mox.StubOutWithMock(imagecache, 'refresh_timestamp')
|
||||
os.path.exists(self.TEMPLATE_DIR).AndReturn(False)
|
||||
image.check_image_exists().AndReturn(False)
|
||||
imagecache.refresh_timestamp(self.TEMPLATE_PATH)
|
||||
fn = self.mox.CreateMockAnything()
|
||||
fn(target=self.TEMPLATE_PATH)
|
||||
self.mox.StubOutWithMock(imagebackend.fileutils, 'ensure_tree')
|
||||
|
@ -555,8 +569,10 @@ class RbdTestCase(_ImageTestCase, test.NoDBTestCase):
|
|||
|
||||
self.mox.StubOutWithMock(os.path, 'exists')
|
||||
self.mox.StubOutWithMock(image, 'check_image_exists')
|
||||
self.mox.StubOutWithMock(imagecache, 'refresh_timestamp')
|
||||
os.path.exists(self.TEMPLATE_DIR).AndReturn(True)
|
||||
image.check_image_exists().AndReturn(False)
|
||||
imagecache.refresh_timestamp(self.TEMPLATE_PATH)
|
||||
fn = self.mox.CreateMockAnything()
|
||||
fn(target=self.TEMPLATE_PATH)
|
||||
self.mox.StubOutWithMock(imagebackend.fileutils, 'ensure_tree')
|
||||
|
@ -572,9 +588,11 @@ class RbdTestCase(_ImageTestCase, test.NoDBTestCase):
|
|||
|
||||
self.mox.StubOutWithMock(os.path, 'exists')
|
||||
self.mox.StubOutWithMock(image, 'check_image_exists')
|
||||
self.mox.StubOutWithMock(imagecache, 'refresh_timestamp')
|
||||
os.path.exists(self.TEMPLATE_DIR).AndReturn(True)
|
||||
image.check_image_exists().AndReturn(True)
|
||||
os.path.exists(self.TEMPLATE_PATH).AndReturn(True)
|
||||
imagecache.refresh_timestamp(self.TEMPLATE_PATH)
|
||||
self.mox.ReplayAll()
|
||||
|
||||
image.cache(None, self.TEMPLATE)
|
||||
|
@ -586,8 +604,10 @@ class RbdTestCase(_ImageTestCase, test.NoDBTestCase):
|
|||
|
||||
self.mox.StubOutWithMock(os.path, 'exists')
|
||||
self.mox.StubOutWithMock(image, 'check_image_exists')
|
||||
self.mox.StubOutWithMock(imagecache, 'refresh_timestamp')
|
||||
os.path.exists(self.TEMPLATE_DIR).AndReturn(True)
|
||||
image.check_image_exists().AndReturn(False)
|
||||
imagecache.refresh_timestamp(self.TEMPLATE_PATH)
|
||||
fn = self.mox.CreateMockAnything()
|
||||
fn(target=self.TEMPLATE_PATH)
|
||||
self.mox.ReplayAll()
|
||||
|
@ -635,6 +655,7 @@ class RbdTestCase(_ImageTestCase, test.NoDBTestCase):
|
|||
|
||||
self.stubs.Set(os.path, 'exists', lambda _: True)
|
||||
self.stubs.Set(image, 'check_image_exists', lambda: True)
|
||||
self.stubs.Set(imagecache, 'refresh_timestamp', lambda _: None)
|
||||
|
||||
image.cache(fake_fetch, self.TEMPLATE_PATH, self.SIZE)
|
||||
|
||||
|
|
|
@ -655,6 +655,12 @@ class ImageCacheManagerTestCase(test.NoDBTestCase):
|
|||
|
||||
self.stubs.Set(os.path, 'getmtime', lambda x: getmtime(x))
|
||||
|
||||
# Fake decorator for lock
|
||||
def passthru(f):
|
||||
return f
|
||||
|
||||
self.stubs.Set(imagecache, '_lock_for_refresh', lambda _: passthru)
|
||||
|
||||
# Make sure we don't accidentally remove a real file
|
||||
orig_remove = os.remove
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@ from nova import utils
|
|||
from nova.virt.disk import api as disk
|
||||
from nova.virt import images
|
||||
from nova.virt.libvirt import config as vconfig
|
||||
from nova.virt.libvirt import imagecache
|
||||
from nova.virt.libvirt import utils as libvirt_utils
|
||||
|
||||
|
||||
|
@ -181,7 +182,7 @@ class Image(object):
|
|||
if not os.path.exists(base_dir):
|
||||
fileutils.ensure_tree(base_dir)
|
||||
base = os.path.join(base_dir, filename)
|
||||
|
||||
imagecache.refresh_timestamp(base)
|
||||
if not self.check_image_exists() or not os.path.exists(base):
|
||||
self.create_image(fetch_func_sync, base, size,
|
||||
*args, **kwargs)
|
||||
|
|
|
@ -238,10 +238,33 @@ def write_stored_checksum(target):
|
|||
write_stored_info(target, field='sha1', value=_hash_file(target))
|
||||
|
||||
|
||||
def _get_lock_path():
|
||||
return os.path.join(CONF.instances_path, 'locks')
|
||||
|
||||
|
||||
def _lock_for_refresh(base_file):
|
||||
lock_name = 'refresh-%s' % os.path.split(base_file)[-1]
|
||||
return utils.synchronized(lock_name, external=True,
|
||||
lock_path=_get_lock_path())
|
||||
|
||||
|
||||
def refresh_timestamp(base_file):
|
||||
"""Update age of a file not to removed from the cache."""
|
||||
|
||||
@_lock_for_refresh(base_file)
|
||||
def inner_refresh_timestamp():
|
||||
if os.path.exists(base_file):
|
||||
virtutils.chown(base_file, os.getuid())
|
||||
os.utime(base_file, None)
|
||||
|
||||
if os.path.exists(base_file):
|
||||
inner_refresh_timestamp()
|
||||
|
||||
|
||||
class ImageCacheManager(imagecache.ImageCacheManager):
|
||||
def __init__(self):
|
||||
super(ImageCacheManager, self).__init__()
|
||||
self.lock_path = os.path.join(CONF.instances_path, 'locks')
|
||||
self.lock_path = _get_lock_path()
|
||||
self._reset_state()
|
||||
|
||||
def _reset_state(self):
|
||||
|
@ -426,33 +449,50 @@ class ImageCacheManager(imagecache.ImageCacheManager):
|
|||
|
||||
Returns nothing.
|
||||
"""
|
||||
|
||||
# TODO(arata): If the lock is already owned by other process
|
||||
# (or thread), it means the process is refreshing or removing
|
||||
# the base file. Then, this process has nothing to do after
|
||||
# the lock is acquired, because the base file is already
|
||||
# refreshed (and should not be removed) or removed.
|
||||
# So, unless the lock is immediately acquired, this method
|
||||
# should return immediately instead of blocking meaninglessly.
|
||||
|
||||
@_lock_for_refresh(base_file)
|
||||
def inner_remove_base_file():
|
||||
if not os.path.exists(base_file):
|
||||
LOG.debug(_('Cannot remove %(base_file)s, it does not exist'),
|
||||
base_file)
|
||||
return
|
||||
|
||||
mtime = os.path.getmtime(base_file)
|
||||
age = time.time() - mtime
|
||||
|
||||
maxage = CONF.libvirt.remove_unused_resized_minimum_age_seconds
|
||||
if base_file in self.originals:
|
||||
maxage = CONF.remove_unused_original_minimum_age_seconds
|
||||
|
||||
if age < maxage:
|
||||
LOG.info(_('Base file too young to remove: %s'),
|
||||
base_file)
|
||||
else:
|
||||
LOG.info(_('Removing base file: %s'), base_file)
|
||||
try:
|
||||
os.remove(base_file)
|
||||
signature = get_info_filename(base_file)
|
||||
if os.path.exists(signature):
|
||||
os.remove(signature)
|
||||
except OSError as e:
|
||||
LOG.error(_('Failed to remove %(base_file)s, '
|
||||
'error was %(error)s'),
|
||||
{'base_file': base_file,
|
||||
'error': e})
|
||||
|
||||
if not os.path.exists(base_file):
|
||||
LOG.debug(_('Cannot remove %(base_file)s, it does not exist'),
|
||||
base_file)
|
||||
return
|
||||
|
||||
mtime = os.path.getmtime(base_file)
|
||||
age = time.time() - mtime
|
||||
|
||||
maxage = CONF.libvirt.remove_unused_resized_minimum_age_seconds
|
||||
if base_file in self.originals:
|
||||
maxage = CONF.remove_unused_original_minimum_age_seconds
|
||||
|
||||
if age < maxage:
|
||||
LOG.info(_('Base file too young to remove: %s'),
|
||||
base_file)
|
||||
else:
|
||||
LOG.info(_('Removing base file: %s'), base_file)
|
||||
try:
|
||||
os.remove(base_file)
|
||||
signature = get_info_filename(base_file)
|
||||
if os.path.exists(signature):
|
||||
os.remove(signature)
|
||||
except OSError as e:
|
||||
LOG.error(_('Failed to remove %(base_file)s, '
|
||||
'error was %(error)s'),
|
||||
{'base_file': base_file,
|
||||
'error': e})
|
||||
inner_remove_base_file()
|
||||
|
||||
def _handle_base_image(self, img_id, base_file):
|
||||
"""Handle the checks for a single base image."""
|
||||
|
@ -519,9 +559,9 @@ class ImageCacheManager(imagecache.ImageCacheManager):
|
|||
'use'),
|
||||
{'id': img_id,
|
||||
'base_file': base_file})
|
||||
if os.path.exists(base_file):
|
||||
virtutils.chown(base_file, os.getuid())
|
||||
os.utime(base_file, None)
|
||||
# TODO(arata): Should not block to acquire the lock here
|
||||
# as well in _remove_base_file()
|
||||
refresh_timestamp(base_file)
|
||||
|
||||
def _age_and_verify_cached_images(self, context, all_instances, base_dir):
|
||||
LOG.debug(_('Verify base images'))
|
||||
|
|
Loading…
Reference in New Issue