Enable cloning for rbd-backed ephemeral disks

Currently when using rbd as an image backend, nova downloads the glance
image to local disk and then copies it again into rbd. This can be very
slow for large images, and wastes bandwidth as well as disk space.

When the glance image is stored in the same ceph cluster, the data is
being pulled out and pushed back in unnecessarily. Instead, create a
copy-on-write clone of the image. This is fast, and does not depend on
the size of the image. Instead of taking minutes, booting takes seconds,
and is not limited by the disk copy.

Add some rbd utility functions from cinder to support cloning and let
the rbd imagebackend rely on librbd instead of the rbd command line tool
for checking image existence.

Adds a new clone() method to the image backend, so backends like rbd can
make optimizations like this. Try to use clone() for the root disk when
it comes from an image, but fall back to fetch_to_raw() if clone()
fails.

Instead of calling disk.get_disk_size() directly from
verify_base_size(), which assumes the disk is stored locally, add a new
method that is overridden by the Rbd subclass to get the disk size.

DocImpact

Implements: blueprint rbd-clone-image-handler
Closes-Bug: 1226351
Co-Authored-By: Josh Durgin <josh.durgin@inktank.com>
Signed-Off-By: Josh Durgin <josh.durgin@inktank.com>
Signed-Off-By: Zhi Yan Liu <zhiyanl@cn.ibm.com>
Signed-Off-By: Dmitry Borodaenko <dborodaenko@mirantis.com>
Change-Id: I0f50659b54a92fc21086990be8925ea15008569a
This commit is contained in:
Dmitry Borodaenko 2014-05-19 16:30:14 -07:00
parent 155eeabbfa
commit 112b140e2d
5 changed files with 330 additions and 33 deletions

View File

@ -380,14 +380,14 @@ class Qcow2TestCase(_ImageTestCase, test.NoDBTestCase):
def test_create_image_too_small(self):
fn = self.prepare_mocks()
self.mox.StubOutWithMock(os.path, 'exists')
self.mox.StubOutWithMock(imagebackend.disk, 'get_disk_size')
self.mox.StubOutWithMock(imagebackend.Qcow2, 'get_disk_size')
if self.OLD_STYLE_INSTANCE_PATH:
os.path.exists(self.OLD_STYLE_INSTANCE_PATH).AndReturn(False)
os.path.exists(self.DISK_INFO_PATH).AndReturn(False)
os.path.exists(self.INSTANCES_PATH).AndReturn(True)
os.path.exists(self.TEMPLATE_PATH).AndReturn(True)
imagebackend.disk.get_disk_size(self.TEMPLATE_PATH
).AndReturn(self.SIZE)
imagebackend.Qcow2.get_disk_size(self.TEMPLATE_PATH
).AndReturn(self.SIZE)
self.mox.ReplayAll()
image = self.image_class(self.INSTANCE, self.NAME)
@ -693,6 +693,7 @@ class RbdTestCase(_ImageTestCase, test.NoDBTestCase):
self.mox.VerifyAll()
def test_cache_base_dir_exists(self):
fn = self.mox.CreateMockAnything()
image = self.image_class(self.INSTANCE, self.NAME)
self.mox.StubOutWithMock(os.path, 'exists')
@ -748,22 +749,72 @@ class RbdTestCase(_ImageTestCase, test.NoDBTestCase):
fake_processutils.fake_execute_clear_log()
fake_processutils.stub_out_processutils_execute(self.stubs)
self.mox.StubOutWithMock(imagebackend.disk, 'get_disk_size')
imagebackend.disk.get_disk_size(self.TEMPLATE_PATH
).AndReturn(self.SIZE)
image = self.image_class(self.INSTANCE, self.NAME)
self.mox.StubOutWithMock(image, 'check_image_exists')
image.check_image_exists().AndReturn(False)
image.check_image_exists().AndReturn(False)
self.mox.ReplayAll()
image.create_image(fn, self.TEMPLATE_PATH, None)
rbd_name = "%s_%s" % (self.INSTANCE['uuid'], self.NAME)
cmd = ('rbd', 'import', '--pool', self.POOL, self.TEMPLATE_PATH,
rbd_name, '--new-format', '--id', self.USER,
'--conf', self.CONF)
self.mox.ReplayAll()
self.assertEqual(fake_processutils.fake_execute_get_log(),
[' '.join(cmd)])
self.mox.VerifyAll()
def test_create_image_resize(self):
fn = self.mox.CreateMockAnything()
full_size = self.SIZE * 2
fn(max_size=full_size, target=self.TEMPLATE_PATH)
rbd.rbd.RBD_FEATURE_LAYERING = 1
fake_processutils.fake_execute_clear_log()
fake_processutils.stub_out_processutils_execute(self.stubs)
image = self.image_class(self.INSTANCE, self.NAME)
image.create_image(fn, self.TEMPLATE_PATH, None)
self.mox.StubOutWithMock(image, 'check_image_exists')
image.check_image_exists().AndReturn(False)
image.check_image_exists().AndReturn(False)
rbd_name = "%s_%s" % (self.INSTANCE['uuid'], self.NAME)
cmd = ('rbd', 'import', '--pool', self.POOL, self.TEMPLATE_PATH,
rbd_name, '--new-format', '--id', self.USER,
'--conf', self.CONF)
self.mox.StubOutWithMock(image, 'get_disk_size')
image.get_disk_size(rbd_name).AndReturn(self.SIZE)
self.mox.StubOutWithMock(image.driver, 'resize')
image.driver.resize(rbd_name, full_size)
self.mox.ReplayAll()
image.create_image(fn, self.TEMPLATE_PATH, full_size)
self.assertEqual(fake_processutils.fake_execute_get_log(),
[' '.join(cmd)])
self.mox.VerifyAll()
def test_create_image_already_exists(self):
rbd.rbd.RBD_FEATURE_LAYERING = 1
image = self.image_class(self.INSTANCE, self.NAME)
self.mox.StubOutWithMock(image, 'check_image_exists')
image.check_image_exists().AndReturn(True)
self.mox.StubOutWithMock(image, 'get_disk_size')
image.get_disk_size(self.TEMPLATE_PATH).AndReturn(self.SIZE)
image.check_image_exists().AndReturn(True)
rbd_name = "%s_%s" % (self.INSTANCE['uuid'], self.NAME)
image.get_disk_size(rbd_name).AndReturn(self.SIZE)
self.mox.ReplayAll()
fn = self.mox.CreateMockAnything()
image.create_image(fn, self.TEMPLATE_PATH, self.SIZE)
self.mox.VerifyAll()
def test_prealloc_image(self):
CONF.set_override('preallocate_images', 'space')

View File

@ -13,6 +13,7 @@
import mock
from nova import exception
from nova.openstack.common import log as logging
from nova import test
from nova import utils
@ -81,6 +82,70 @@ class RbdTestCase(test.NoDBTestCase):
def tearDown(self):
super(RbdTestCase, self).tearDown()
def test_good_locations(self):
locations = ['rbd://fsid/pool/image/snap',
'rbd://%2F/%2F/%2F/%2F', ]
map(self.driver.parse_url, locations)
def test_bad_locations(self):
locations = ['rbd://image',
'http://path/to/somewhere/else',
'rbd://image/extra',
'rbd://image/',
'rbd://fsid/pool/image/',
'rbd://fsid/pool/image/snap/',
'rbd://///', ]
for loc in locations:
self.assertRaises(exception.ImageUnacceptable,
self.driver.parse_url, loc)
self.assertFalse(self.driver.is_cloneable({'url': loc},
{'disk_format': 'raw'}))
@mock.patch.object(rbd.RBDDriver, '_get_fsid')
@mock.patch.object(rbd, 'rbd')
@mock.patch.object(rbd, 'rados')
def test_cloneable(self, mock_rados, mock_rbd, mock_get_fsid):
mock_get_fsid.return_value = 'abc'
location = {'url': 'rbd://abc/pool/image/snap'}
info = {'disk_format': 'raw'}
self.assertTrue(self.driver.is_cloneable(location, info))
self.assertTrue(mock_get_fsid.called)
@mock.patch.object(rbd.RBDDriver, '_get_fsid')
def test_uncloneable_different_fsid(self, mock_get_fsid):
mock_get_fsid.return_value = 'abc'
location = {'url': 'rbd://def/pool/image/snap'}
self.assertFalse(
self.driver.is_cloneable(location, {'disk_format': 'raw'}))
self.assertTrue(mock_get_fsid.called)
@mock.patch.object(rbd.RBDDriver, '_get_fsid')
@mock.patch.object(rbd, 'RBDVolumeProxy')
@mock.patch.object(rbd, 'rbd')
@mock.patch.object(rbd, 'rados')
def test_uncloneable_unreadable(self, mock_rados, mock_rbd, mock_proxy,
mock_get_fsid):
mock_get_fsid.return_value = 'abc'
location = {'url': 'rbd://abc/pool/image/snap'}
mock_proxy.side_effect = mock_rbd.Error
self.assertFalse(
self.driver.is_cloneable(location, {'disk_format': 'raw'}))
mock_proxy.assert_called_once_with(self.driver, 'image', pool='pool',
snapshot='snap', read_only=True)
self.assertTrue(mock_get_fsid.called)
@mock.patch.object(rbd.RBDDriver, '_get_fsid')
def test_uncloneable_bad_format(self, mock_get_fsid):
mock_get_fsid.return_value = 'abc'
location = {'url': 'rbd://abc/pool/image/snap'}
formats = ['qcow2', 'vmdk', 'vdi']
for f in formats:
self.assertFalse(
self.driver.is_cloneable(location, {'disk_format': f}))
self.assertTrue(mock_get_fsid.called)
@mock.patch.object(utils, 'execute')
def test_get_mon_addrs(self, mock_execute):
mock_execute.return_value = (CEPH_MON_DUMP, '')
@ -88,6 +153,37 @@ class RbdTestCase(test.NoDBTestCase):
ports = ['6789', '6790', '6791', '6792', '6791']
self.assertEqual((hosts, ports), self.driver.get_mon_addrs())
@mock.patch.object(rbd, 'RADOSClient')
@mock.patch.object(rbd, 'rbd')
@mock.patch.object(rbd, 'rados')
def test_clone(self, mock_rados, mock_rbd, mock_client):
pool = u'images'
image = u'image-name'
snap = u'snapshot-name'
location = {'url': u'rbd://fsid/%s/%s/%s' % (pool, image, snap)}
client_stack = []
def mock__enter__(inst):
def _inner():
client_stack.append(inst)
return inst
return _inner
client = mock_client.return_value
# capture both rados client used to perform the clone
client.__enter__.side_effect = mock__enter__(client)
rbd = mock_rbd.RBD.return_value
self.driver.clone(location, self.volume_name)
args = [client_stack[0].ioctx, str(image), str(snap),
client_stack[1].ioctx, str(self.volume_name)]
kwargs = {'features': mock_rbd.RBD_FEATURE_LAYERING}
rbd.clone.assert_called_once_with(*args, **kwargs)
self.assertEqual(client.__enter__.call_count, 2)
@mock.patch.object(rbd, 'RBDVolumeProxy')
def test_resize(self, mock_proxy):
size = 1024
@ -163,8 +259,11 @@ class RbdTestCase(test.NoDBTestCase):
@mock.patch.object(rbd, 'RBDVolumeProxy')
def test_exists(self, mock_proxy):
snapshot = 'snap'
proxy = mock_proxy.return_value
self.assertTrue(self.driver.exists(self.volume_name))
self.assertTrue(self.driver.exists(self.volume_name,
self.rbd_pool,
snapshot))
proxy.__enter__.assert_called_once_with()
proxy.__exit__.assert_called_once_with(None, None, None)

View File

@ -2684,13 +2684,23 @@ class LibvirtDriver(driver.ComputeDriver):
if size == 0 or suffix == '.rescue':
size = None
image('disk').cache(fetch_func=libvirt_utils.fetch_image,
context=context,
filename=root_fname,
size=size,
image_id=disk_images['image_id'],
user_id=instance['user_id'],
project_id=instance['project_id'])
backend = image('disk')
if backend.SUPPORTS_CLONE:
def clone_fallback_to_fetch(*args, **kwargs):
try:
backend.clone(context, disk_images['image_id'])
except exception.ImageUnacceptable:
libvirt_utils.fetch_image(*args, **kwargs)
fetch_func = clone_fallback_to_fetch
else:
fetch_func = libvirt_utils.fetch_image
backend.cache(fetch_func=fetch_func,
context=context,
filename=root_fname,
size=size,
image_id=disk_images['image_id'],
user_id=instance['user_id'],
project_id=instance['project_id'])
# Lookup the filesystem type if required
os_type_with_default = disk.get_fs_type_for_os_type(

View File

@ -23,6 +23,7 @@ import six
from nova import exception
from nova.i18n import _
from nova.i18n import _LE
from nova import image
from nova.openstack.common import excutils
from nova.openstack.common import fileutils
from nova.openstack.common import jsonutils
@ -72,11 +73,14 @@ CONF.import_opt('rbd_user', 'nova.virt.libvirt.volume', group='libvirt')
CONF.import_opt('rbd_secret_uuid', 'nova.virt.libvirt.volume', group='libvirt')
LOG = logging.getLogger(__name__)
IMAGE_API = image.API()
@six.add_metaclass(abc.ABCMeta)
class Image(object):
SUPPORTS_CLONE = False
def __init__(self, source_type, driver_format, is_block_dev=False):
"""Image initialization.
@ -197,8 +201,7 @@ class Image(object):
'path': self.path})
return can_fallocate
@staticmethod
def verify_base_size(base, size, base_size=0):
def verify_base_size(self, base, size, base_size=0):
"""Check that the base image is not larger than size.
Since images can't be generally shrunk, enforce this
constraint taking account of virtual image size.
@ -217,7 +220,7 @@ class Image(object):
return
if size and not base_size:
base_size = disk.get_disk_size(base)
base_size = self.get_disk_size(base)
if size < base_size:
msg = _LE('%(base)s virtual size %(base_size)s '
@ -227,6 +230,9 @@ class Image(object):
'size': size})
raise exception.FlavorDiskTooSmall()
def get_disk_size(self, name):
disk.get_disk_size(name)
def snapshot_extract(self, target, out_format):
raise NotImplementedError()
@ -295,6 +301,21 @@ class Image(object):
"""True if the backend puts images on a shared block storage."""
return False
def clone(self, context, image_id_or_uri):
"""Clone an image.
Note that clone operation is backend-dependent. The backend may ask
the image API for a list of image "locations" and select one or more
of those locations to clone an image from.
:param image_id_or_uri: The ID or URI of an image to clone.
:raises: exception.ImageUnacceptable if it cannot be cloned
"""
reason = _('clone() is not implemented')
raise exception.ImageUnacceptable(image_id=image_id_or_uri,
reason=reason)
class Raw(Image):
def __init__(self, instance=None, disk_name=None, path=None):
@ -483,6 +504,9 @@ class Lvm(Image):
class Rbd(Image):
SUPPORTS_CLONE = True
def __init__(self, instance=None, disk_name=None, path=None, **kwargs):
super(Rbd, self).__init__("block", "rbd", is_block_dev=True)
if path:
@ -525,7 +549,7 @@ class Rbd(Image):
info = vconfig.LibvirtConfigGuestDisk()
hosts, ports = self.driver.get_mon_addrs()
info.device_type = device_type
info.source_device = device_type
info.driver_format = 'raw'
info.driver_cache = cache_mode
info.target_bus = disk_bus
@ -552,16 +576,27 @@ class Rbd(Image):
def check_image_exists(self):
return self.driver.exists(self.rbd_name)
def get_disk_size(self, name):
"""Returns the size of the virtual disk in bytes.
The name argument is ignored since this backend already knows
its name, and callers may pass a non-existent local file path.
"""
return self.driver.size(self.rbd_name)
def create_image(self, prepare_template, base, size, *args, **kwargs):
if not os.path.exists(base):
if not self.check_image_exists():
prepare_template(target=base, max_size=size, *args, **kwargs)
else:
self.verify_base_size(base, size)
self.driver.import_image(base, self.rbd_name)
# prepare_template() may have cloned the image into a new rbd
# image already instead of downloading it locally
if not self.check_image_exists():
self.driver.import_image(base, self.rbd_name)
base_size = disk.get_disk_size(base)
if size and size > base_size:
if size and size > self.get_disk_size(self.rbd_name):
self.driver.resize(self.rbd_name, size)
def snapshot_extract(self, target, out_format):
@ -571,6 +606,31 @@ class Rbd(Image):
def is_shared_block_storage():
return True
def clone(self, context, image_id_or_uri):
if not self.driver.supports_layering():
reason = _('installed version of librbd does not support cloning')
raise exception.ImageUnacceptable(image_id=image_id_or_uri,
reason=reason)
image_meta = IMAGE_API.get(context, image_id_or_uri,
include_locations=True)
locations = image_meta['locations']
LOG.debug('Image locations are: %(locs)s' % {'locs': locations})
if image_meta.get('disk_format') not in ['raw', 'iso']:
reason = _('Image is not raw format')
raise exception.ImageUnacceptable(image_id=image_id_or_uri,
reason=reason)
for location in locations:
if self.driver.is_cloneable(location, image_meta):
return self.driver.clone(location, self.rbd_name)
reason = _('No image locations are accessible')
raise exception.ImageUnacceptable(image_id=image_id_or_uri,
reason=reason)
class Backend(object):
def __init__(self, use_cow):

View File

@ -14,6 +14,8 @@
# License for the specific language governing permissions and limitations
# under the License.
import urllib
try:
import rados
import rbd
@ -21,9 +23,11 @@ except ImportError:
rados = None
rbd = None
from nova import exception
from nova.i18n import _
from nova.i18n import _LE
from nova.i18n import _LW
from nova.openstack.common import excutils
from nova.openstack.common import jsonutils
from nova.openstack.common import log as logging
from nova import utils
@ -40,14 +44,23 @@ class RBDVolumeProxy(object):
The underlying librados client and ioctx can be accessed as the attributes
'client' and 'ioctx'.
"""
def __init__(self, driver, name, pool=None):
def __init__(self, driver, name, pool=None, snapshot=None,
read_only=False):
client, ioctx = driver._connect_to_rados(pool)
try:
self.volume = rbd.Image(ioctx, str(name), snapshot=None)
snap_name = snapshot.encode('utf8') if snapshot else None
self.volume = rbd.Image(ioctx, name.encode('utf8'),
snapshot=snap_name,
read_only=read_only)
except rbd.ImageNotFound:
with excutils.save_and_reraise_exception():
LOG.debug("rbd image %s does not exist", name)
driver._disconnect_from_rados(client, ioctx)
except rbd.Error:
LOG.exception(_LE("error opening rbd image %s"), name)
driver._disconnect_from_rados(client, ioctx)
raise
with excutils.save_and_reraise_exception():
LOG.exception(_LE("error opening rbd image %s"), name)
driver._disconnect_from_rados(client, ioctx)
self.driver = driver
self.client = client
self.ioctx = ioctx
@ -94,8 +107,8 @@ class RBDDriver(object):
conffile=self.ceph_conf)
try:
client.connect()
pool_to_open = str(pool or self.pool)
ioctx = client.open_ioctx(pool_to_open)
pool_to_open = pool or self.pool
ioctx = client.open_ioctx(pool_to_open.encode('utf-8'))
return client, ioctx
except rados.Error:
# shutdown cannot raise an exception
@ -139,6 +152,67 @@ class RBDDriver(object):
ports.append(port)
return hosts, ports
def parse_url(self, url):
prefix = 'rbd://'
if not url.startswith(prefix):
reason = _('Not stored in rbd')
raise exception.ImageUnacceptable(image_id=url, reason=reason)
pieces = map(urllib.unquote, url[len(prefix):].split('/'))
if '' in pieces:
reason = _('Blank components')
raise exception.ImageUnacceptable(image_id=url, reason=reason)
if len(pieces) != 4:
reason = _('Not an rbd snapshot')
raise exception.ImageUnacceptable(image_id=url, reason=reason)
return pieces
def _get_fsid(self):
with RADOSClient(self) as client:
return client.cluster.get_fsid()
def is_cloneable(self, image_location, image_meta):
url = image_location['url']
try:
fsid, pool, image, snapshot = self.parse_url(url)
except exception.ImageUnacceptable as e:
LOG.debug('not cloneable: %s', e)
return False
if self._get_fsid() != fsid:
reason = '%s is in a different ceph cluster' % url
LOG.debug(reason)
return False
if image_meta['disk_format'] != 'raw':
reason = ("rbd image clone requires image format to be "
"'raw' but image {0} is '{1}'").format(
url, image_meta['disk_format'])
LOG.debug(reason)
return False
# check that we can read the image
try:
return self.exists(image, pool=pool, snapshot=snapshot)
except rbd.Error as e:
LOG.debug('Unable to open image %(loc)s: %(err)s' %
dict(loc=url, err=e))
return False
def clone(self, image_location, dest_name):
_fsid, pool, image, snapshot = self.parse_url(
image_location['url'])
LOG.debug('cloning %(pool)s/%(img)s@%(snap)s' %
dict(pool=pool, img=image, snap=snapshot))
with RADOSClient(self, str(pool)) as src_client:
with RADOSClient(self) as dest_client:
# pylint: disable E1101
rbd.RBD().clone(src_client.ioctx,
image.encode('utf-8'),
snapshot.encode('utf-8'),
dest_client.ioctx,
dest_name,
features=rbd.RBD_FEATURE_LAYERING)
def size(self, name):
with RBDVolumeProxy(self, name) as vol:
return vol.size()
@ -153,9 +227,12 @@ class RBDDriver(object):
with RBDVolumeProxy(self, name) as vol:
vol.resize(size)
def exists(self, name):
def exists(self, name, pool=None, snapshot=None):
try:
with RBDVolumeProxy(self, name):
with RBDVolumeProxy(self, name,
pool=pool,
snapshot=snapshot,
read_only=True):
return True
except rbd.ImageNotFound:
return False