Fix live-migration failure in FC multipath case

Currently, /dev/dm-<NUM> instead of /dev/mapper/<multipath_id> is
used to access multipath FC volumes by Compute Node and
multipath_id in connection_info is not maintained properly and
may be lost during connection refreshing.

This implementation will make source Compute Node and destination
Compute Node fail to disconnect/connect to volumes properly and
result in live-migration failure.

To fix it, /dev/mapper<multipath_id> will be used instead of
/dev/dm-<NUM> to access multipath devices, just like iSCSI multipath
implementation, and logic to preserve the unique (across Compute
Nodes) multipath_id is also added.

Change-Id: I17f15852c098af88afd270084c62eb87693c60d4
Closes-Bug: #1327497
This commit is contained in:
Jeegn Chen 2014-06-08 16:23:36 +08:00 committed by Jeegn Chen
parent 5c4695741b
commit 3ea14e8a70
7 changed files with 47 additions and 17 deletions

View File

@ -4783,7 +4783,7 @@ class ComputeManager(manager.Manager):
# Cleanup source host post live-migration
block_device_info = self._get_instance_block_device_info(
ctxt, instance, bdms)
ctxt, instance, bdms=bdms)
self.driver.post_live_migration(ctxt, instance, block_device_info,
migrate_data)

View File

@ -14,7 +14,7 @@
"""Generic linux scsi subsystem utilities."""
from nova.i18n import _
from nova.i18n import _LW
from nova.openstack.common import log as logging
from nova.openstack.common import loopingcall
from nova.openstack.common import processutils
@ -97,7 +97,7 @@ def find_multipath_device(device):
(out, err) = utils.execute('multipath', '-l', device,
run_as_root=True)
except processutils.ProcessExecutionError as exc:
LOG.warn(_("Multipath call failed exit (%(code)s)")
LOG.warn(_LW("Multipath call failed exit (%(code)s)")
% {'code': exc.exit_code})
return None
@ -110,15 +110,15 @@ def find_multipath_device(device):
# device line output is different depending
# on /etc/multipath.conf settings.
if info[1][:2] == "dm":
mdev = "/dev/%s" % info[1]
mdev_id = info[0]
mdev = '/dev/mapper/%s' % mdev_id
elif info[2][:2] == "dm":
mdev = "/dev/%s" % info[2]
mdev_id = info[1].replace('(', '')
mdev_id = mdev_id.replace(')', '')
mdev = '/dev/mapper/%s' % mdev_id
if mdev is None:
LOG.warn(_("Couldn't find multipath device %s"), line)
LOG.warn(_LW("Couldn't find multipath device %s"), line)
return None
LOG.debug("Found multipath device = %s", mdev)
@ -126,6 +126,11 @@ def find_multipath_device(device):
for dev_line in device_lines:
if dev_line.find("policy") != -1:
continue
if '#' in dev_line:
LOG.warn(_LW('Skip faulty line "%(dev_line)s" of'
' multipath device %(mdev)s')
% {'mdev': mdev, 'dev_line': dev_line})
continue
dev_line = dev_line.lstrip(' |-`')
dev_info = dev_line.split()

View File

@ -59,7 +59,7 @@ class StorageLinuxSCSITestCase(test.NoDBTestCase):
info = linuxscsi.find_multipath_device('/dev/sde')
LOG.error("info = %s" % info)
self.assertEqual("/dev/dm-3", info["device"])
self.assertEqual("/dev/mapper/350002ac20398383d", info["device"])
self.assertEqual("/dev/sde", info['devices'][0]['device'])
self.assertEqual("0", info['devices'][0]['host'])
self.assertEqual("0", info['devices'][0]['id'])
@ -90,7 +90,8 @@ class StorageLinuxSCSITestCase(test.NoDBTestCase):
info = linuxscsi.find_multipath_device('/dev/sde')
LOG.error("info = %s" % info)
self.assertEqual("/dev/dm-2", info["device"])
self.assertEqual("/dev/mapper/36005076da00638089c000000000004d5",
info["device"])
self.assertEqual("/dev/sde", info['devices'][0]['device'])
self.assertEqual("6", info['devices'][0]['host'])
self.assertEqual("0", info['devices'][0]['channel'])
@ -118,7 +119,8 @@ class StorageLinuxSCSITestCase(test.NoDBTestCase):
info = linuxscsi.find_multipath_device('/dev/sdd')
LOG.error("info = %s" % info)
self.assertEqual("/dev/dm-2", info["device"])
self.assertEqual("/dev/mapper/36005076303ffc48e0000000000000101",
info["device"])
self.assertEqual("/dev/sdd", info['devices'][0]['device'])
self.assertEqual("6", info['devices'][0]['host'])
self.assertEqual("0", info['devices'][0]['channel'])

View File

@ -951,14 +951,22 @@ class LibvirtVolumeTestCase(test.NoDBTestCase):
mount_device = "vde"
conf = libvirt_driver.connect_volume(connection_info,
self.disk_info)
self.assertEqual('1234567890',
connection_info['data']['multipath_id'])
tree = conf.format_dom()
self.assertEqual(tree.get('type'), 'block')
self.assertEqual(tree.find('./source').get('dev'),
multipath_devname)
connection_info["data"]["devices"] = devices["devices"]
self.assertEqual('block', tree.get('type'))
self.assertEqual(multipath_devname,
tree.find('./source').get('dev'))
# Test the scenario where multipath_id is returned
libvirt_driver.disconnect_volume(connection_info, mount_device)
expected_commands = []
self.assertEqual(self.executes, expected_commands)
self.assertEqual(expected_commands, self.executes)
# Test the scenario where multipath_id is not returned
connection_info["data"]["devices"] = devices["devices"]
del connection_info["data"]["multipath_id"]
libvirt_driver.disconnect_volume(connection_info, mount_device)
expected_commands = []
self.assertEqual(expected_commands, self.executes)
# Should not work for anything other than string, unicode, and list
connection_info = self.fibrechan_connection(self.vol,

View File

@ -436,8 +436,8 @@ class TestDriverBlockDevice(test.NoDBTestCase):
instance = {'id': 'fake_id', 'uuid': 'fake_uuid'}
connector = {'ip': 'fake_ip', 'host': 'fake_host'}
connection_info = {'data': {}}
expected_conn_info = {'data': {},
connection_info = {'data': {'multipath_id': 'fake_multipath_id'}}
expected_conn_info = {'data': {'multipath_id': 'fake_multipath_id'},
'serial': 'fake-volume-id-2'}
self.mox.StubOutWithMock(test_bdm._bdm_obj, 'save')

View File

@ -17,6 +17,7 @@ import operator
from nova import block_device
from nova.i18n import _
from nova.i18n import _LI
from nova import objects
from nova.objects import base as obj_base
from nova.openstack.common import excutils
@ -208,6 +209,14 @@ class DriverVolumeBlockDevice(DriverBlockDevice):
except TypeError:
self['connection_info'] = None
def _preserve_multipath_id(self, connection_info):
if self['connection_info'] and 'data' in self['connection_info']:
if 'multipath_id' in self['connection_info']['data']:
connection_info['data']['multipath_id'] =\
self['connection_info']['data']['multipath_id']
LOG.info(_LI('preserve multipath_id %s'),
connection_info['data']['multipath_id'])
@update_db
def attach(self, context, instance, volume_api, virt_driver,
do_check_attach=True, do_driver_attach=False):
@ -224,6 +233,7 @@ class DriverVolumeBlockDevice(DriverBlockDevice):
connector)
if 'serial' not in connection_info:
connection_info['serial'] = self.volume_id
self._preserve_multipath_id(connection_info)
# If do_driver_attach is False, we will attach a volume to an instance
# at boot time. So actual attach is done by instance creation code.
@ -267,6 +277,7 @@ class DriverVolumeBlockDevice(DriverBlockDevice):
connector)
if 'serial' not in connection_info:
connection_info['serial'] = self.volume_id
self._preserve_multipath_id(connection_info)
self['connection_info'] = connection_info
def save(self, context):

View File

@ -980,7 +980,6 @@ class LibvirtFibreChannelVolumeDriver(LibvirtBaseVolumeDriver):
"""Detach the volume from instance_name."""
super(LibvirtFibreChannelVolumeDriver,
self).disconnect_volume(connection_info, mount_device)
devices = connection_info['data']['devices']
# If this is a multipath device, we need to search again
# and make sure we remove all the devices. Some of them
@ -990,6 +989,11 @@ class LibvirtFibreChannelVolumeDriver(LibvirtBaseVolumeDriver):
mdev_info = linuxscsi.find_multipath_device(multipath_id)
devices = mdev_info['devices']
LOG.debug("devices to remove = %s", devices)
else:
# only needed when multipath-tools work improperly
devices = connection_info['data'].get('devices', [])
LOG.warn(_LW("multipath-tools probably work improperly. "
"devices to remove = %s.") % devices)
# There may have been more than 1 device mounted
# by the kernel for this volume. We have to remove