Merge "backup init_host cleanup exception handling"
This commit is contained in:
commit
0f6d84dcea
|
@ -199,113 +199,147 @@ class BackupManager(manager.SchedulerDependentManager):
|
|||
for mgr in self.volume_managers.values():
|
||||
self._init_volume_driver(ctxt, mgr.driver)
|
||||
|
||||
try:
|
||||
self._cleanup_incomplete_backup_operations(ctxt)
|
||||
except Exception:
|
||||
# Don't block startup of the backup service.
|
||||
LOG.exception(_LE("Problem cleaning incomplete backup "
|
||||
"operations."))
|
||||
|
||||
def _cleanup_incomplete_backup_operations(self, ctxt):
|
||||
LOG.info(_LI("Cleaning up incomplete backup operations."))
|
||||
volumes = self.db.volume_get_all_by_host(ctxt, self.host)
|
||||
|
||||
for volume in volumes:
|
||||
volume_host = volume_utils.extract_host(volume['host'], 'backend')
|
||||
backend = self._get_volume_backend(host=volume_host)
|
||||
mgr = self._get_manager(backend)
|
||||
if volume['status'] == 'backing-up':
|
||||
self._detach_all_attachments(ctxt, mgr, volume)
|
||||
LOG.info(_LI('Resetting volume %(vol_id)s to previous '
|
||||
'status %(status)s (was backing-up).'),
|
||||
{'vol_id': volume['id'],
|
||||
'status': volume['previous_status']})
|
||||
self.db.volume_update(ctxt, volume['id'],
|
||||
{'status': volume['previous_status']})
|
||||
elif volume['status'] == 'restoring-backup':
|
||||
self._detach_all_attachments(ctxt, mgr, volume)
|
||||
LOG.info(_LI('setting volume %s to error_restoring '
|
||||
'(was restoring-backup).'), volume['id'])
|
||||
self.db.volume_update(ctxt, volume['id'],
|
||||
{'status': 'error_restoring'})
|
||||
try:
|
||||
self._cleanup_one_volume(ctxt, volume)
|
||||
except Exception:
|
||||
LOG.exception(_LE("Problem cleaning up volume %(vol)s."),
|
||||
{'vol': volume['id']})
|
||||
|
||||
# TODO(smulcahy) implement full resume of backup and restore
|
||||
# operations on restart (rather than simply resetting)
|
||||
backups = objects.BackupList.get_all_by_host(ctxt, self.host)
|
||||
for backup in backups:
|
||||
if backup['status'] == 'creating':
|
||||
LOG.info(_LI('Resetting backup %s to error (was creating).'),
|
||||
backup['id'])
|
||||
err = 'incomplete backup reset on manager restart'
|
||||
self._update_backup_error(backup, ctxt, err)
|
||||
if backup['status'] == 'restoring':
|
||||
LOG.info(_LI('Resetting backup %s to '
|
||||
'available (was restoring).'),
|
||||
backup['id'])
|
||||
backup.status = 'available'
|
||||
backup.save()
|
||||
if backup['status'] == 'deleting':
|
||||
LOG.info(_LI('Resuming delete on backup: %s.'), backup['id'])
|
||||
if CONF.backup_service_inithost_offload:
|
||||
# Offload all the pending backup delete operations to the
|
||||
# threadpool to prevent the main backup service thread
|
||||
# from being blocked.
|
||||
self._add_to_threadpool(self.delete_backup, ctxt, backup)
|
||||
else:
|
||||
# By default, delete backups sequentially
|
||||
self.delete_backup(ctxt, backup)
|
||||
try:
|
||||
self._cleanup_one_backup(ctxt, backup)
|
||||
except Exception:
|
||||
LOG.exception(_LE("Problem cleaning up backup %(bkup)s."),
|
||||
{'bkup': backup['id']})
|
||||
try:
|
||||
self._cleanup_temp_volumes_snapshots_for_one_backup(ctxt,
|
||||
backup)
|
||||
except Exception:
|
||||
LOG.exception(_LE("Problem cleaning temp volumes and "
|
||||
"snapshots for backup %(bkup)s."),
|
||||
{'bkup': backup['id']})
|
||||
|
||||
self._cleanup_temp_volumes_snapshots(backups)
|
||||
def _cleanup_one_volume(self, ctxt, volume):
|
||||
volume_host = volume_utils.extract_host(volume['host'], 'backend')
|
||||
backend = self._get_volume_backend(host=volume_host)
|
||||
mgr = self._get_manager(backend)
|
||||
if volume['status'] == 'backing-up':
|
||||
self._detach_all_attachments(ctxt, mgr, volume)
|
||||
LOG.info(_LI('Resetting volume %(vol_id)s to previous '
|
||||
'status %(status)s (was backing-up).'),
|
||||
{'vol_id': volume['id'],
|
||||
'status': volume['previous_status']})
|
||||
self.db.volume_update(ctxt, volume['id'],
|
||||
{'status': volume['previous_status']})
|
||||
elif volume['status'] == 'restoring-backup':
|
||||
self._detach_all_attachments(ctxt, mgr, volume)
|
||||
LOG.info(_LI('setting volume %s to error_restoring '
|
||||
'(was restoring-backup).'), volume['id'])
|
||||
self.db.volume_update(ctxt, volume['id'],
|
||||
{'status': 'error_restoring'})
|
||||
|
||||
def _cleanup_one_backup(self, ctxt, backup):
|
||||
if backup['status'] == 'creating':
|
||||
LOG.info(_LI('Resetting backup %s to error (was creating).'),
|
||||
backup['id'])
|
||||
err = 'incomplete backup reset on manager restart'
|
||||
self._update_backup_error(backup, ctxt, err)
|
||||
if backup['status'] == 'restoring':
|
||||
LOG.info(_LI('Resetting backup %s to '
|
||||
'available (was restoring).'),
|
||||
backup['id'])
|
||||
backup.status = 'available'
|
||||
backup.save()
|
||||
if backup['status'] == 'deleting':
|
||||
LOG.info(_LI('Resuming delete on backup: %s.'), backup['id'])
|
||||
if CONF.backup_service_inithost_offload:
|
||||
# Offload all the pending backup delete operations to the
|
||||
# threadpool to prevent the main backup service thread
|
||||
# from being blocked.
|
||||
self._add_to_threadpool(self.delete_backup, ctxt, backup)
|
||||
else:
|
||||
# By default, delete backups sequentially
|
||||
self.delete_backup(ctxt, backup)
|
||||
|
||||
def _detach_all_attachments(self, ctxt, mgr, volume):
|
||||
attachments = volume['volume_attachment'] or []
|
||||
for attachment in attachments:
|
||||
if (attachment['attached_host'] == self.host and
|
||||
attachment['instance_uuid'] is None):
|
||||
mgr.detach_volume(ctxt, volume['id'],
|
||||
attachment['id'])
|
||||
try:
|
||||
mgr.detach_volume(ctxt, volume['id'],
|
||||
attachment['id'])
|
||||
except Exception:
|
||||
LOG.exception(_LE("Detach attachment %(attach_id)s"
|
||||
" failed."),
|
||||
{'attach_id': attachment['id']},
|
||||
resource=volume)
|
||||
|
||||
def _cleanup_temp_volumes_snapshots(self, backups):
|
||||
def _cleanup_temp_volumes_snapshots_for_one_backup(self, ctxt, backup):
|
||||
# NOTE(xyang): If the service crashes or gets restarted during the
|
||||
# backup operation, there could be temporary volumes or snapshots
|
||||
# that are not deleted. Make sure any temporary volumes or snapshots
|
||||
# create by the backup job are deleted when service is started.
|
||||
ctxt = context.get_admin_context()
|
||||
for backup in backups:
|
||||
try:
|
||||
volume = self.db.volume_get(ctxt, backup.volume_id)
|
||||
volume_host = volume_utils.extract_host(volume['host'],
|
||||
'backend')
|
||||
backend = self._get_volume_backend(host=volume_host)
|
||||
mgr = self._get_manager(backend)
|
||||
except (KeyError, exception.VolumeNotFound):
|
||||
LOG.debug("Could not find a volume to clean up for "
|
||||
"backup %s.", backup.id)
|
||||
return
|
||||
|
||||
if backup.temp_volume_id and backup.status == 'error':
|
||||
try:
|
||||
volume = self.db.volume_get(ctxt, backup.volume_id)
|
||||
volume_host = volume_utils.extract_host(volume['host'],
|
||||
'backend')
|
||||
backend = self._get_volume_backend(host=volume_host)
|
||||
mgr = self._get_manager(backend)
|
||||
except (KeyError, exception.VolumeNotFound):
|
||||
LOG.debug("Could not find a volume to clean up for "
|
||||
"backup %s.", backup.id)
|
||||
continue
|
||||
if backup.temp_volume_id and backup.status == 'error':
|
||||
try:
|
||||
temp_volume = self.db.volume_get(ctxt,
|
||||
backup.temp_volume_id)
|
||||
# The temp volume should be deleted directly thru the
|
||||
# the volume driver, not thru the volume manager.
|
||||
mgr.driver.delete_volume(temp_volume)
|
||||
self.db.volume_destroy(ctxt, temp_volume['id'])
|
||||
except exception.VolumeNotFound:
|
||||
LOG.debug("Could not find temp volume %(vol)s to clean up "
|
||||
"for backup %(backup)s.",
|
||||
{'vol': backup.temp_volume_id,
|
||||
'backup': backup.id})
|
||||
backup.temp_volume_id = None
|
||||
backup.save()
|
||||
if backup.temp_snapshot_id and backup.status == 'error':
|
||||
try:
|
||||
temp_snapshot = objects.Snapshot.get_by_id(
|
||||
ctxt, backup.temp_snapshot_id)
|
||||
# The temp snapshot should be deleted directly thru the
|
||||
# volume driver, not thru the volume manager.
|
||||
mgr.driver.delete_snapshot(temp_snapshot)
|
||||
with temp_snapshot.obj_as_admin():
|
||||
self.db.volume_glance_metadata_delete_by_snapshot(
|
||||
ctxt, temp_snapshot.id)
|
||||
temp_snapshot.destroy()
|
||||
except exception.SnapshotNotFound:
|
||||
LOG.debug("Could not find temp snapshot %(snap)s to clean "
|
||||
"up for backup %(backup)s.",
|
||||
{'snap': backup.temp_snapshot_id,
|
||||
'backup': backup.id})
|
||||
backup.temp_snapshot_id = None
|
||||
backup.save()
|
||||
temp_volume = self.db.volume_get(ctxt,
|
||||
backup.temp_volume_id)
|
||||
# The temp volume should be deleted directly thru the
|
||||
# the volume driver, not thru the volume manager.
|
||||
mgr.driver.delete_volume(temp_volume)
|
||||
self.db.volume_destroy(ctxt, temp_volume['id'])
|
||||
except exception.VolumeNotFound:
|
||||
LOG.debug("Could not find temp volume %(vol)s to clean up "
|
||||
"for backup %(backup)s.",
|
||||
{'vol': backup.temp_volume_id,
|
||||
'backup': backup.id})
|
||||
backup.temp_volume_id = None
|
||||
backup.save()
|
||||
|
||||
if backup.temp_snapshot_id and backup.status == 'error':
|
||||
try:
|
||||
temp_snapshot = objects.Snapshot.get_by_id(
|
||||
ctxt, backup.temp_snapshot_id)
|
||||
# The temp snapshot should be deleted directly thru the
|
||||
# volume driver, not thru the volume manager.
|
||||
mgr.driver.delete_snapshot(temp_snapshot)
|
||||
with temp_snapshot.obj_as_admin():
|
||||
self.db.volume_glance_metadata_delete_by_snapshot(
|
||||
ctxt, temp_snapshot.id)
|
||||
temp_snapshot.destroy()
|
||||
except exception.SnapshotNotFound:
|
||||
LOG.debug("Could not find temp snapshot %(snap)s to clean "
|
||||
"up for backup %(backup)s.",
|
||||
{'snap': backup.temp_snapshot_id,
|
||||
'backup': backup.id})
|
||||
backup.temp_snapshot_id = None
|
||||
backup.save()
|
||||
|
||||
def create_backup(self, context, backup):
|
||||
"""Create volume backups using configured backup service."""
|
||||
|
|
|
@ -222,6 +222,7 @@ class BackupTestCase(BaseBackupTest):
|
|||
temp_snapshot_id=temp_snap.id)
|
||||
|
||||
self.backup_mgr.init_host()
|
||||
|
||||
vol1 = db.volume_get(self.ctxt, vol1_id)
|
||||
self.assertEqual('available', vol1['status'])
|
||||
vol2 = db.volume_get(self.ctxt, vol2_id)
|
||||
|
@ -269,55 +270,201 @@ class BackupTestCase(BaseBackupTest):
|
|||
mock_add_threadpool.assert_has_calls(calls, any_order=True)
|
||||
self.assertEqual(2, mock_add_threadpool.call_count)
|
||||
|
||||
@mock.patch.object(db, 'volume_get')
|
||||
def test_init_host_handles_exception(self):
|
||||
"""Test that exception in cleanup is handled."""
|
||||
|
||||
self.mock_object(self.backup_mgr, '_init_volume_driver')
|
||||
mock_cleanup = self.mock_object(
|
||||
self.backup_mgr,
|
||||
'_cleanup_incomplete_backup_operations')
|
||||
mock_cleanup.side_effect = [Exception]
|
||||
|
||||
self.assertIsNone(self.backup_mgr.init_host())
|
||||
|
||||
def test_cleanup_incomplete_backup_operations_with_exceptions(self):
|
||||
"""Test cleanup resilience in the face of exceptions."""
|
||||
|
||||
fake_volume_list = [{'id': 'vol1'}, {'id': 'vol2'}]
|
||||
mock_volume_get_by_host = self.mock_object(
|
||||
db, 'volume_get_all_by_host')
|
||||
mock_volume_get_by_host.return_value = fake_volume_list
|
||||
|
||||
mock_volume_cleanup = self.mock_object(
|
||||
self.backup_mgr, '_cleanup_one_volume')
|
||||
mock_volume_cleanup.side_effect = [Exception]
|
||||
|
||||
fake_backup_list = [{'id': 'bkup1'}, {'id': 'bkup2'}, {'id': 'bkup3'}]
|
||||
mock_backup_get_by_host = self.mock_object(
|
||||
objects.BackupList, 'get_all_by_host')
|
||||
mock_backup_get_by_host.return_value = fake_backup_list
|
||||
|
||||
mock_backup_cleanup = self.mock_object(
|
||||
self.backup_mgr, '_cleanup_one_backup')
|
||||
mock_backup_cleanup.side_effect = [Exception]
|
||||
|
||||
mock_temp_cleanup = self.mock_object(
|
||||
self.backup_mgr, '_cleanup_temp_volumes_snapshots_for_one_backup')
|
||||
mock_temp_cleanup.side_effect = [Exception]
|
||||
|
||||
self.assertIsNone(
|
||||
self.backup_mgr._cleanup_incomplete_backup_operations(
|
||||
self.ctxt))
|
||||
|
||||
self.assertEqual(len(fake_volume_list), mock_volume_cleanup.call_count)
|
||||
self.assertEqual(len(fake_backup_list), mock_backup_cleanup.call_count)
|
||||
self.assertEqual(len(fake_backup_list), mock_temp_cleanup.call_count)
|
||||
|
||||
def test_cleanup_one_backing_up_volume(self):
|
||||
"""Test cleanup_one_volume for volume status 'backing-up'."""
|
||||
|
||||
mock_get_manager = self.mock_object(
|
||||
self.backup_mgr, '_get_manager')
|
||||
mock_get_manager.return_value = 'fake_manager'
|
||||
|
||||
volume_id = self._create_volume_db_entry(status='backing-up',
|
||||
previous_status='available')
|
||||
volume = db.volume_get(self.ctxt, volume_id)
|
||||
|
||||
self.backup_mgr._cleanup_one_volume(self.ctxt, volume)
|
||||
|
||||
volume = db.volume_get(self.ctxt, volume_id)
|
||||
self.assertEqual('available', volume['status'])
|
||||
|
||||
def test_cleanup_one_restoring_backup_volume(self):
|
||||
"""Test cleanup_one_volume for volume status 'restoring-backup'."""
|
||||
|
||||
mock_get_manager = self.mock_object(
|
||||
self.backup_mgr, '_get_manager')
|
||||
mock_get_manager.return_value = 'fake_manager'
|
||||
|
||||
volume_id = self._create_volume_db_entry(status='restoring-backup')
|
||||
volume = db.volume_get(self.ctxt, volume_id)
|
||||
|
||||
self.backup_mgr._cleanup_one_volume(self.ctxt, volume)
|
||||
|
||||
volume = db.volume_get(self.ctxt, volume_id)
|
||||
self.assertEqual('error_restoring', volume['status'])
|
||||
|
||||
def test_cleanup_one_creating_backup(self):
|
||||
"""Test cleanup_one_backup for volume status 'creating'."""
|
||||
|
||||
backup = self._create_backup_db_entry(status='creating')
|
||||
|
||||
self.backup_mgr._cleanup_one_backup(self.ctxt, backup)
|
||||
|
||||
self.assertEqual('error', backup.status)
|
||||
|
||||
def test_cleanup_one_restoring_backup(self):
|
||||
"""Test cleanup_one_backup for volume status 'restoring'."""
|
||||
|
||||
backup = self._create_backup_db_entry(status='restoring')
|
||||
|
||||
self.backup_mgr._cleanup_one_backup(self.ctxt, backup)
|
||||
|
||||
self.assertEqual('available', backup.status)
|
||||
|
||||
def test_cleanup_one_deleting_backup(self):
|
||||
"""Test cleanup_one_backup for volume status 'deleting'."""
|
||||
|
||||
backup = self._create_backup_db_entry(status='deleting')
|
||||
|
||||
self.backup_mgr._cleanup_one_backup(self.ctxt, backup)
|
||||
|
||||
self.assertRaises(exception.BackupNotFound,
|
||||
db.backup_get,
|
||||
self.ctxt,
|
||||
backup.id)
|
||||
|
||||
def test_detach_all_attachments_handles_exceptions(self):
|
||||
"""Test detach_all_attachments with exceptions."""
|
||||
|
||||
mock_log = self.mock_object(manager, 'LOG')
|
||||
mock_volume_mgr = mock.Mock()
|
||||
mock_detach_volume = mock_volume_mgr.detach_volume
|
||||
mock_detach_volume.side_effect = [Exception]
|
||||
|
||||
fake_attachments = [
|
||||
{
|
||||
'id': 'attachment1',
|
||||
'attached_host': 'testhost',
|
||||
'instance_uuid': None,
|
||||
},
|
||||
{
|
||||
'id': 'attachment2',
|
||||
'attached_host': 'testhost',
|
||||
'instance_uuid': None,
|
||||
}
|
||||
]
|
||||
fake_volume = {
|
||||
'id': 'fake_volume_id',
|
||||
'volume_attachment': fake_attachments
|
||||
}
|
||||
|
||||
self.backup_mgr._detach_all_attachments(self.ctxt,
|
||||
mock_volume_mgr,
|
||||
fake_volume)
|
||||
|
||||
self.assertEqual(len(fake_attachments), mock_log.exception.call_count)
|
||||
|
||||
@ddt.data(KeyError, exception.VolumeNotFound)
|
||||
def test_cleanup_temp_volumes_snapshots_volume_not_found(
|
||||
self, err, mock_volume_get):
|
||||
def test_cleanup_temp_volumes_snapshots_for_one_backup_volume_not_found(
|
||||
self, err):
|
||||
"""Ensure we handle missing volume for a backup."""
|
||||
|
||||
mock_volume_get = self.mock_object(db, 'volume_get')
|
||||
mock_volume_get.side_effect = [err]
|
||||
|
||||
backup1 = self._create_backup_db_entry(status='creating')
|
||||
backups = [backup1]
|
||||
backup = self._create_backup_db_entry(status='creating')
|
||||
|
||||
self.assertIsNone(self.backup_mgr._cleanup_temp_volumes_snapshots(
|
||||
backups))
|
||||
self.assertIsNone(
|
||||
self.backup_mgr._cleanup_temp_volumes_snapshots_for_one_backup(
|
||||
self.ctxt,
|
||||
backup))
|
||||
|
||||
@mock.patch.object(lvm.LVMVolumeDriver, 'delete_snapshot')
|
||||
def test_cleanup_temp_snapshot_not_found(self,
|
||||
mock_delete_snapshot):
|
||||
def test_cleanup_temp_snapshot_for_one_backup_not_found(self):
|
||||
"""Ensure we handle missing temp snapshot for a backup."""
|
||||
mock_delete_snapshot = self.mock_object(
|
||||
lvm.LVMVolumeDriver, 'delete_snapshot')
|
||||
|
||||
vol1_id = self._create_volume_db_entry()
|
||||
self._create_volume_attach(vol1_id)
|
||||
db.volume_update(self.ctxt, vol1_id, {'status': 'backing-up'})
|
||||
backup1 = self._create_backup_db_entry(status='error',
|
||||
volume_id=vol1_id,
|
||||
temp_snapshot_id='fake')
|
||||
backups = [backup1]
|
||||
self.assertEqual('fake', backups[0].temp_snapshot_id)
|
||||
self.assertIsNone(self.backup_mgr._cleanup_temp_volumes_snapshots(
|
||||
backups))
|
||||
backup = self._create_backup_db_entry(status='error',
|
||||
volume_id=vol1_id,
|
||||
temp_snapshot_id='fake')
|
||||
|
||||
self.assertIsNone(
|
||||
self.backup_mgr._cleanup_temp_volumes_snapshots_for_one_backup(
|
||||
self.ctxt,
|
||||
backup))
|
||||
|
||||
self.assertFalse(mock_delete_snapshot.called)
|
||||
self.assertIsNone(backups[0].temp_snapshot_id)
|
||||
backup1.destroy()
|
||||
self.assertIsNone(backup.temp_snapshot_id)
|
||||
|
||||
backup.destroy()
|
||||
db.volume_destroy(self.ctxt, vol1_id)
|
||||
|
||||
@mock.patch.object(lvm.LVMVolumeDriver, 'delete_volume')
|
||||
def test_cleanup_temp_volume_not_found(self,
|
||||
mock_delete_volume):
|
||||
def test_cleanup_temp_volume_for_one_backup_not_found(self):
|
||||
"""Ensure we handle missing temp volume for a backup."""
|
||||
mock_delete_volume = self.mock_object(
|
||||
lvm.LVMVolumeDriver, 'delete_volume')
|
||||
|
||||
vol1_id = self._create_volume_db_entry()
|
||||
self._create_volume_attach(vol1_id)
|
||||
db.volume_update(self.ctxt, vol1_id, {'status': 'backing-up'})
|
||||
backup1 = self._create_backup_db_entry(status='error',
|
||||
volume_id=vol1_id,
|
||||
temp_volume_id='fake')
|
||||
backups = [backup1]
|
||||
self.assertEqual('fake', backups[0].temp_volume_id)
|
||||
self.assertIsNone(self.backup_mgr._cleanup_temp_volumes_snapshots(
|
||||
backups))
|
||||
backup = self._create_backup_db_entry(status='error',
|
||||
volume_id=vol1_id,
|
||||
temp_volume_id='fake')
|
||||
|
||||
self.assertIsNone(
|
||||
self.backup_mgr._cleanup_temp_volumes_snapshots_for_one_backup(
|
||||
self.ctxt,
|
||||
backup))
|
||||
|
||||
self.assertFalse(mock_delete_volume.called)
|
||||
self.assertIsNone(backups[0].temp_volume_id)
|
||||
backup1.destroy()
|
||||
self.assertIsNone(backup.temp_volume_id)
|
||||
|
||||
backup.destroy()
|
||||
db.volume_destroy(self.ctxt, vol1_id)
|
||||
|
||||
def test_create_backup_with_bad_volume_status(self):
|
||||
|
|
Loading…
Reference in New Issue