libvirt: Fix/implement revert-resize for RBD-backed images

* Makes a snapshot of Ceph-backed roots prior to resize
* Rolls back to snapshot on revert
* Destroys resize snapshots on image cleanup

Conflicts:
    nova/tests/unit/virt/libvirt/test_driver.py

because (I70215fb25ef25422786b96d33c91d8f1d4760a23) isn't on liberty

(cherry picked from commit 29476a67d4)

Closes-Bug: 1369465
Closes-Bug: 1314526
Change-Id: I328d2c41696a9c0f090f822a51ea42fac83f62ec
This commit is contained in:
Nicolas Simonds 2015-06-01 13:58:37 -07:00 committed by Tony Breeds
parent 76c488e79a
commit eb1f67c437
7 changed files with 299 additions and 3 deletions

View File

@ -23,6 +23,8 @@ disk_sizes = {}
disk_backing_files = {}
disk_type = "qcow2"
RESIZE_SNAPSHOT_NAME = libvirt_utils.RESIZE_SNAPSHOT_NAME
def create_image(disk_format, path, size):
pass

View File

@ -14,11 +14,13 @@
import mock
from oslo_log import log as logging
from nova.compute import task_states
from nova import exception
from nova import objects
from nova import test
from nova import utils
from nova.virt.libvirt.storage import rbd_utils
from nova.virt.libvirt import utils as libvirt_utils
LOG = logging.getLogger(__name__)
@ -79,6 +81,7 @@ class RbdTestCase(test.NoDBTestCase):
self.driver = rbd_utils.RBDDriver(self.rbd_pool, None, None)
self.volume_name = u'volume-00000001'
self.snap_name = u'test-snap'
def tearDown(self):
super(RbdTestCase, self).tearDown()
@ -300,7 +303,7 @@ class RbdTestCase(test.NoDBTestCase):
@mock.patch.object(rbd_utils, 'rados')
@mock.patch.object(rbd_utils, 'RADOSClient')
def test_cleanup_volumes(self, mock_client, mock_rados, mock_rbd):
instance = objects.Instance(id=1, uuid='12345')
instance = objects.Instance(id=1, uuid='12345', task_state=None)
rbd = mock_rbd.RBD.return_value
rbd.list.return_value = ['12345_test', '111_test']
@ -316,7 +319,7 @@ class RbdTestCase(test.NoDBTestCase):
@mock.patch.object(rbd_utils, 'RADOSClient')
def _test_cleanup_exception(self, exception_name,
mock_client, mock_rados, mock_rbd):
instance = objects.Instance(id=1, uuid='12345')
instance = objects.Instance(id=1, uuid='12345', task_state=None)
setattr(mock_rbd, exception_name, test.TestingException)
rbd = mock_rbd.RBD.return_value
@ -340,6 +343,51 @@ class RbdTestCase(test.NoDBTestCase):
self.assertRaises(test.TestingException,
self._test_cleanup_exception, 'DoesNotExist')
@mock.patch.object(rbd_utils, 'rbd')
@mock.patch.object(rbd_utils, 'rados')
@mock.patch.object(rbd_utils, 'RADOSClient')
@mock.patch.object(rbd_utils, 'RBDVolumeProxy')
def test_cleanup_volumes_pending_resize(self, mock_proxy, mock_client,
mock_rados, mock_rbd):
instance = objects.Instance(id=1, uuid='12345', task_state=None)
setattr(mock_rbd, 'ImageHasSnapshots', test.TestingException)
rbd = mock_rbd.RBD.return_value
rbd.remove.side_effect = [test.TestingException, None]
rbd.list.return_value = ['12345_test', '111_test']
proxy = mock_proxy.return_value
proxy.__enter__.return_value = proxy
proxy.list_snaps.return_value = [
{'name': libvirt_utils.RESIZE_SNAPSHOT_NAME}]
client = mock_client.return_value
self.driver.cleanup_volumes(instance)
remove_call = mock.call(client.ioctx, '12345_test')
rbd.remove.assert_has_calls([remove_call, remove_call])
proxy.remove_snap.assert_called_once_with(
libvirt_utils.RESIZE_SNAPSHOT_NAME)
client.__enter__.assert_called_once_with()
client.__exit__.assert_called_once_with(None, None, None)
@mock.patch.object(rbd_utils, 'rbd')
@mock.patch.object(rbd_utils, 'rados')
@mock.patch.object(rbd_utils, 'RADOSClient')
def test_cleanup_volumes_reverting_resize(self, mock_client, mock_rados,
mock_rbd):
instance = objects.Instance(id=1, uuid='12345',
task_state=task_states.RESIZE_REVERTING)
rbd = mock_rbd.RBD.return_value
rbd.list.return_value = ['12345_test', '111_test',
'12345_test_disk.local']
client = mock_client.return_value
self.driver.cleanup_volumes(instance)
rbd.remove.assert_called_once_with(client.ioctx,
'12345_test_disk.local')
client.__enter__.assert_called_once_with()
client.__exit__.assert_called_once_with(None, None, None)
@mock.patch.object(rbd_utils, 'rbd')
@mock.patch.object(rbd_utils, 'rados')
@mock.patch.object(rbd_utils, 'RADOSClient')
@ -354,3 +402,33 @@ class RbdTestCase(test.NoDBTestCase):
# Make sure that we entered and exited the RADOSClient
client.__enter__.assert_called_once_with()
client.__exit__.assert_called_once_with(None, None, None)
@mock.patch.object(rbd_utils, 'RBDVolumeProxy')
def test_create_snap(self, mock_proxy):
proxy = mock_proxy.return_value
proxy.__enter__.return_value = proxy
self.driver.create_snap(self.volume_name, self.snap_name)
proxy.create_snap.assert_called_once_with(self.snap_name)
@mock.patch.object(rbd_utils, 'RBDVolumeProxy')
def test_remove_snap(self, mock_proxy):
proxy = mock_proxy.return_value
proxy.__enter__.return_value = proxy
self.driver.remove_snap(self.volume_name, self.snap_name)
self.assertFalse(proxy.remove_snap.called)
proxy.list_snaps.return_value = [{'name': self.snap_name}, ]
self.driver.remove_snap(self.volume_name, self.snap_name)
proxy.remove_snap.assert_called_once_with(self.snap_name)
@mock.patch.object(rbd_utils, 'RBDVolumeProxy')
def test_rollback_to_snap(self, mock_proxy):
proxy = mock_proxy.return_value
proxy.__enter__.return_value = proxy
self.assertRaises(exception.SnapshotNotFound,
self.driver.rollback_to_snap,
self.volume_name, self.snap_name)
proxy.list_snaps.return_value = [{'name': self.snap_name}, ]
self.driver.rollback_to_snap(self.volume_name, self.snap_name)
proxy.rollback_to_snap.assert_called_once_with(self.snap_name)

View File

@ -505,6 +505,7 @@ def _create_test_instance():
'ephemeral_key_uuid': None,
'vcpu_model': None,
'host': 'fake-host',
'task_state': None,
}
@ -8364,6 +8365,22 @@ class LibvirtConnTestCase(test.NoDBTestCase):
host='fake-source-host',
receive=True)
@mock.patch.object(nova.virt.libvirt.imagebackend.Image, 'cache')
def test_create_image_resize_snap_backend(self, mock_cache):
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
drvr.image_backend = mock.Mock()
drvr.image_backend.image.return_value = drvr.image_backend
instance = objects.Instance(**self.test_instance)
instance.task_state = task_states.RESIZE_FINISH
image_meta = objects.ImageMeta.from_dict(self.test_image_meta)
disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
instance,
image_meta)
with mock.patch.object(drvr.image_backend, 'create_snap') as mock_crt:
drvr._create_image(self.context, instance, disk_info['mapping'])
mock_crt.assert_called_once_with(
libvirt_utils.RESIZE_SNAPSHOT_NAME)
@mock.patch.object(utils, 'execute')
def test_create_ephemeral_specified_fs(self, mock_exec):
self.flags(default_ephemeral_format='ext3')
@ -12389,6 +12406,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
inst['key_data'] = 'ABCDEFG'
inst['system_metadata'] = {}
inst['metadata'] = {}
inst['task_state'] = None
inst.update(params)
@ -13045,6 +13063,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
context = 'fake_context'
instance = self._create_instance()
self.mox.StubOutWithMock(imagebackend.Backend, 'image')
self.mox.StubOutWithMock(libvirt_utils, 'get_instance_path')
self.mox.StubOutWithMock(os.path, 'exists')
self.mox.StubOutWithMock(shutil, 'rmtree')
@ -13068,6 +13087,9 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
shutil.rmtree('/fake/foo')
utils.execute('mv', '/fake/foo_resize', '/fake/foo')
imagebackend.Backend.image(mox.IgnoreArg(), 'disk').AndReturn(
fake_imagebackend.Raw())
self.mox.ReplayAll()
self.drvr.finish_revert_migration(context, instance, [])
@ -13105,6 +13127,45 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
side_effect=fake_get_guest_xml)):
drvr.finish_revert_migration('', instance, None, power_on=False)
def test_finish_revert_migration_snap_backend(self):
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
drvr.image_backend = mock.Mock()
drvr.image_backend.image.return_value = drvr.image_backend
ins_ref = self._create_instance()
with test.nested(
mock.patch.object(utils, 'get_image_from_system_metadata'),
mock.patch.object(drvr, '_create_domain_and_network'),
mock.patch.object(drvr, '_get_guest_xml')) as (
mock_image, mock_cdn, mock_ggx):
mock_image.return_value = {'disk_format': 'raw'}
drvr.finish_revert_migration('', ins_ref, None, power_on=False)
drvr.image_backend.rollback_to_snap.assert_called_once_with(
libvirt_utils.RESIZE_SNAPSHOT_NAME)
drvr.image_backend.remove_snap.assert_called_once_with(
libvirt_utils.RESIZE_SNAPSHOT_NAME, ignore_errors=True)
def test_finish_revert_migration_snap_backend_snapshot_not_found(self):
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
drvr.image_backend = mock.Mock()
drvr.image_backend.image.return_value = drvr.image_backend
ins_ref = self._create_instance()
with test.nested(
mock.patch.object(rbd_utils, 'RBDDriver'),
mock.patch.object(utils, 'get_image_from_system_metadata'),
mock.patch.object(drvr, '_create_domain_and_network'),
mock.patch.object(drvr, '_get_guest_xml')) as (
mock_rbd, mock_image, mock_cdn, mock_ggx):
mock_image.return_value = {'disk_format': 'raw'}
mock_rbd.rollback_to_snap.side_effect = exception.SnapshotNotFound(
snapshot_id='testing')
drvr.finish_revert_migration('', ins_ref, None, power_on=False)
drvr.image_backend.remove_snap.assert_called_once_with(
libvirt_utils.RESIZE_SNAPSHOT_NAME, ignore_errors=True)
def test_cleanup_failed_migration(self):
self.mox.StubOutWithMock(shutil, 'rmtree')
shutil.rmtree('/fake/inst')
@ -13131,6 +13192,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
self.stubs.Set(os.path, 'exists', fake_os_path_exists)
self.mox.StubOutWithMock(imagebackend.Backend, 'image')
self.mox.StubOutWithMock(libvirt_utils, 'get_instance_path')
self.mox.StubOutWithMock(utils, 'execute')
@ -13138,6 +13200,8 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
forceold=True).AndReturn('/fake/inst')
utils.execute('rm', '-rf', '/fake/inst_resize', delay_on_retry=True,
attempts=5)
imagebackend.Backend.image(ins_ref, 'disk').AndReturn(
fake_imagebackend.Raw())
self.mox.ReplayAll()
self.drvr._cleanup_resize(ins_ref,
@ -13168,6 +13232,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
self.stubs.Set(self.drvr.firewall_driver,
'unfilter_instance', fake_unfilter_instance)
self.mox.StubOutWithMock(imagebackend.Backend, 'image')
self.mox.StubOutWithMock(libvirt_utils, 'get_instance_path')
self.mox.StubOutWithMock(utils, 'execute')
@ -13175,11 +13240,36 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
forceold=True).AndReturn('/fake/inst')
utils.execute('rm', '-rf', '/fake/inst_resize', delay_on_retry=True,
attempts=5)
imagebackend.Backend.image(ins_ref, 'disk').AndReturn(
fake_imagebackend.Raw())
self.mox.ReplayAll()
self.drvr._cleanup_resize(ins_ref,
_fake_network_info(self.stubs, 1))
def test_cleanup_resize_snap_backend(self):
CONF.set_override('policy_dirs', [])
ins_ref = self._create_instance({'host': CONF.host})
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
drvr.image_backend = mock.Mock()
drvr.image_backend.image.return_value = drvr.image_backend
with test.nested(
mock.patch.object(os.path, 'exists'),
mock.patch.object(libvirt_utils, 'get_instance_path'),
mock.patch.object(utils, 'execute'),
mock.patch.object(drvr.image_backend, 'remove_snap')) as (
mock_exists, mock_get_path, mock_exec, mock_remove):
mock_exists.return_value = True
mock_get_path.return_value = '/fake/inst'
drvr._cleanup_resize(ins_ref, _fake_network_info(self.stubs, 1))
mock_get_path.assert_called_once_with(ins_ref, forceold=True)
mock_exec.assert_called_once_with('rm', '-rf', '/fake/inst_resize',
delay_on_retry=True, attempts=5)
mock_remove.assert_called_once_with(
libvirt_utils.RESIZE_SNAPSHOT_NAME, ignore_errors=True)
def test_get_instance_disk_info_exception(self):
instance = self._create_instance()

View File

@ -1030,6 +1030,17 @@ class LibvirtDriver(driver.ComputeDriver):
utils.execute('rm', '-rf', target, delay_on_retry=True,
attempts=5)
backend = self.image_backend.image(instance, 'disk')
# TODO(nic): Set ignore_errors=False in a future release.
# It is set to True here to avoid any upgrade issues surrounding
# instances being in pending resize state when the software is updated;
# in that case there will be no snapshot to remove. Once it can be
# reasonably assumed that no such instances exist in the wild
# anymore, it should be set back to False (the default) so it will
# throw errors, like it should.
backend.remove_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME,
ignore_errors=True)
if instance.host != CONF.host:
self._undefine_domain(instance)
self.unplug_vifs(instance, network_info)
@ -2918,6 +2929,8 @@ class LibvirtDriver(driver.ComputeDriver):
size = None
backend = image('disk')
if instance.task_state == task_states.RESIZE_FINISH:
backend.create_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME)
if backend.SUPPORTS_CLONE:
def clone_fallback_to_fetch(*args, **kwargs):
try:
@ -6967,6 +6980,25 @@ class LibvirtDriver(driver.ComputeDriver):
image_meta = objects.ImageMeta.from_instance(instance)
backend = self.image_backend.image(instance, 'disk')
# Once we rollback, the snapshot is no longer needed, so remove it
# TODO(nic): Remove the try/except/finally in a future release
# To avoid any upgrade issues surrounding instances being in pending
# resize state when the software is updated, this portion of the
# method logs exceptions rather than failing on them. Once it can be
# reasonably assumed that no such instances exist in the wild
# anymore, the try/except/finally should be removed,
# and ignore_errors should be set back to False (the default) so
# that problems throw errors, like they should.
try:
backend.rollback_to_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME)
except exception.SnapshotNotFound:
LOG.warning(_LW("Failed to rollback snapshot (%s)"),
libvirt_utils.RESIZE_SNAPSHOT_NAME)
finally:
backend.remove_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME,
ignore_errors=True)
disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
instance,
image_meta,

View File

@ -408,6 +408,31 @@ class Image(object):
# we should talk about if we want this functionality for everything.
pass
def create_snap(self, name):
"""Create a snapshot on the image. A noop on backends that don't
support snapshots.
:param name: name of the snapshot
"""
pass
def remove_snap(self, name, ignore_errors=False):
"""Remove a snapshot on the image. A noop on backends that don't
support snapshots.
:param name: name of the snapshot
:param ignore_errors: don't log errors if the snapshot does not exist
"""
pass
def rollback_to_snap(self, name):
"""Rollback the image to the named snapshot. A noop on backends that
don't support snapshots.
:param name: name of the snapshot
"""
pass
class Raw(Image):
def __init__(self, instance=None, disk_name=None, path=None):
@ -843,6 +868,15 @@ class Rbd(Image):
self.driver.remove_image(name)
self.driver.import_image(local_file, name)
def create_snap(self, name):
return self.driver.create_snap(self.rbd_name, name)
def remove_snap(self, name, ignore_errors=False):
return self.driver.remove_snap(self.rbd_name, name, ignore_errors)
def rollback_to_snap(self, name):
return self.driver.rollback_to_snap(self.rbd_name, name)
class Ploop(Image):
def __init__(self, instance=None, disk_name=None, path=None):

View File

@ -16,6 +16,8 @@
import urllib
from eventlet import tpool
try:
import rados
import rbd
@ -29,11 +31,13 @@ from oslo_service import loopingcall
from oslo_utils import excutils
from oslo_utils import units
from nova.compute import task_states
from nova import exception
from nova.i18n import _
from nova.i18n import _LE
from nova.i18n import _LW
from nova import utils
from nova.virt.libvirt import utils as libvirt_utils
LOG = logging.getLogger(__name__)
@ -282,6 +286,9 @@ class RBDDriver(object):
try:
rbd.RBD().remove(client.ioctx, volume)
raise loopingcall.LoopingCallDone(retvalue=False)
except rbd.ImageHasSnapshots:
self.remove_snap(volume, libvirt_utils.RESIZE_SNAPSHOT_NAME,
ignore_errors=True)
except (rbd.ImageBusy, rbd.ImageHasSnapshots):
LOG.warn(_LW('rbd remove %(volume)s in pool %(pool)s '
'failed'),
@ -293,7 +300,15 @@ class RBDDriver(object):
with RADOSClient(self, self.pool) as client:
def belongs_to_instance(disk):
return disk.startswith(instance.uuid)
# NOTE(nic): On revert_resize, the cleanup steps for the root
# volume are handled with an "rbd snap rollback" command,
# and none of this is needed (and is, in fact, harmful) so
# filter out non-ephemerals from the list
if instance.task_state == task_states.RESIZE_REVERTING:
return (disk.startswith(instance.uuid) and
disk.endswith('disk.local'))
else:
return disk.startswith(instance.uuid)
volumes = rbd.RBD().list(client.ioctx)
for volume in filter(belongs_to_instance, volumes):
@ -316,3 +331,46 @@ class RBDDriver(object):
return {'total': stats['kb'] * units.Ki,
'free': stats['kb_avail'] * units.Ki,
'used': stats['kb_used'] * units.Ki}
def create_snap(self, volume, name):
"""Create a snapshot on an RBD object.
:volume: Name of RBD object
:name: Name of snapshot
"""
LOG.debug('creating snapshot(%(snap)s) on rbd image(%(img)s)',
{'snap': name, 'img': volume})
with RBDVolumeProxy(self, volume) as vol:
tpool.execute(vol.create_snap, name)
def remove_snap(self, volume, name, ignore_errors=False):
"""Remove a snapshot from an RBD volume.
:volume: Name of RBD object
:name: Name of snapshot
:ignore_errors: whether or not to log warnings on failures
"""
with RBDVolumeProxy(self, volume) as vol:
if name in [snap.get('name', '') for snap in vol.list_snaps()]:
LOG.debug('removing snapshot(%(snap)s) on rbd image(%(img)s)',
{'snap': name, 'img': volume})
tpool.execute(vol.remove_snap, name)
else:
if not ignore_errors:
LOG.warning(_LW('no snapshot(%(snap)s) found on '
'image(%(img)s)'), {'snap': name,
'img': volume})
def rollback_to_snap(self, volume, name):
"""Revert an RBD volume to its contents at a snapshot.
:volume: Name of RBD object
:name: Name of snapshot
"""
with RBDVolumeProxy(self, volume) as vol:
if name in [snap.get('name', '') for snap in vol.list_snaps()]:
LOG.debug('rolling back rbd image(%(img)s) to '
'snapshot(%(snap)s)', {'snap': name, 'img': volume})
tpool.execute(vol.rollback_to_snap, name)
else:
raise exception.SnapshotNotFound(snapshot_id=name)

View File

@ -48,6 +48,8 @@ CONF.register_opts(libvirt_opts, 'libvirt')
CONF.import_opt('instances_path', 'nova.compute.manager')
LOG = logging.getLogger(__name__)
RESIZE_SNAPSHOT_NAME = 'nova-resize'
def execute(*args, **kwargs):
return utils.execute(*args, **kwargs)