Refactor iSCSI connect

This patch refactors iSCSI connect code changing the approach to one
that relies primarily on sysfs, instead of CLI tools, to retrieve all
the required information: devices from the connection, multipath system
device name, multipath name, the WWN for the block devices...

By doing so, not only do we fix a good number of bugs, but we also
improve the reliability and speed of the mechanism.

A good example of improvements and benefits achieved by this patch are:

- Clean all leftovers on exceptions on a connection.

- Parallelize logins on multipath to increase speed on flaky network
  connections.

- As long as there is at least one good path when working with multipath
  we will return a multipath device instead of a single path device,
  which helps with temporary flaky connections.

- Don't use the rescan retry parameter as log in retry on multipath
  connections so both single and multipath cases are consistent.

- We no longer rely on just one device to get the wwn and look for the
  multipath.  This would be problematic with flaky connections.

- No more querying iSCSI devices for their WWN (page 0x83) removing
  delays and issue on flaky connections.

- It's no longer a problem for the mechanism the fact that a device
  exists but is not accessible.

- We use links in `/dev/disk/by-id` to get the WWID on connect, so we
  make sure there are no leftovers on disconnect, but we no longer use
  symlinks from `/dev/disk/by-path`, `/dev/disk/by-id`, or `/dev/mapper`
  to find devices.

- We no longer need to rely on the WWN to determine the multipath, we
  have the session and the LUN, so we trace the devices and from those
  we get if they belong to a multipath.

- Stop polluting the logs with unnecessary exceptions from checking if
  the node or session exist.

- Action retries will now only log the final exception instead of
  logging all the exceptions.

- Warn when a multipath could not be formed and a single device is being
  used, as performance will be degraded.

- We no longer do global rescans on single connections that could be
  bringing in unrelated and unwanted devices (`iscsiadm -T iqn -p portal
  --rescan`).

- Fix scan mechanism that would not request all scans when the same iqn
  was shareed between portals and could send a scan request to the wrong
  IP if they shared the same iqn.

- When doing single pathed connections we could end with a multipath
  because we didn't clean up unfruitful connections.

It's worth mentioning that this patch does not touch the extend
operation.

Change-Id: Ia1c47bfaa7bc3544a5feba6a8a30faf2f132b8d7
This commit is contained in:
Gorka Eguileor 2017-04-04 20:23:58 +02:00
parent 192bdfaab6
commit 56c8665d3d
8 changed files with 1266 additions and 748 deletions

View File

@ -163,14 +163,6 @@ class InvalidConnectorProtocol(ValueError):
pass
class HostChannelsTargetsNotFound(BrickException):
message = _('Unable to find host, channel, and target for %(iqns)s.')
def __init__(self, message=None, iqns=None, found=None):
super(HostChannelsTargetsNotFound, self).__init__(message, iqns=iqns)
self.found = found
class ExceptionChainer(BrickException):
"""A Exception that can contain a group of exceptions.

View File

@ -14,18 +14,18 @@
import collections
import copy
import glob
import os
import threading
import time
from oslo_concurrency import lockutils
from oslo_concurrency import processutils as putils
from oslo_log import log as logging
from oslo_utils import excutils
from oslo_utils import strutils
from os_brick import exception
from os_brick.i18n import _
from os_brick import initiator
from os_brick.initiator.connectors import base
from os_brick.initiator.connectors import base_iscsi
@ -94,11 +94,7 @@ class ISCSIConnector(base.BaseLinuxConnector, base_iscsi.BaseISCSIConnector):
# didn't exist previously.
# We are simply trying to find any existing volumes with
# already connected sessions.
host_devices, target_props = self._get_potential_volume_paths(
connection_properties,
connect_to_portal=False,
use_rescan=False)
host_devices = self._get_potential_volume_paths(connection_properties)
for path in host_devices:
if os.path.exists(path):
volume_paths.append(path)
@ -163,9 +159,45 @@ class ISCSIConnector(base.BaseLinuxConnector, base_iscsi.BaseISCSIConnector):
# entry: [tcp, [1], 192.168.121.250:3260,1 ...]
return [entry[2] for entry in self._get_iscsi_sessions_full()]
def _get_potential_volume_paths(self, connection_properties,
connect_to_portal=True,
use_rescan=True):
def _get_ips_iqns_luns(self, connection_properties):
"""Build a list of ips, iqns, and luns.
:param connection_properties: The dictionary that describes all
of the target volume attributes.
:type connection_properties: dict
:returns: list of tuples of (ip, iqn, lun)
"""
try:
ips_iqns_luns = self._discover_iscsi_portals(connection_properties)
except Exception:
if 'target_portals' in connection_properties:
raise exception.TargetPortalsNotFound(
target_portals=connection_properties['target_portals'])
if 'target_portal' in connection_properties:
raise exception.TargetPortalNotFound(
target_portal=connection_properties['target_portal'])
raise
if not connection_properties.get('target_iqns'):
# There are two types of iSCSI multipath devices. One which
# shares the same iqn between multiple portals, and the other
# which use different iqns on different portals.
# Try to identify the type by checking the iscsiadm output
# if the iqn is used by multiple portals. If it is, it's
# the former, so use the supplied iqn. Otherwise, it's the
# latter, so try the ip,iqn combinations to find the targets
# which constitutes the multipath device.
main_iqn = connection_properties['target_iqn']
all_portals = {(ip, lun) for ip, iqn, lun in ips_iqns_luns}
match_portals = {(ip, lun) for ip, iqn, lun in ips_iqns_luns
if iqn == main_iqn}
if len(all_portals) == len(match_portals):
ips_iqns_luns = [(p[0], main_iqn, p[1])
for p in all_portals]
return ips_iqns_luns
def _get_potential_volume_paths(self, connection_properties):
"""Build a list of potential volume paths that exist.
Given a list of target_portals in the connection_properties,
@ -173,10 +205,7 @@ class ISCSIConnector(base.BaseLinuxConnector, base_iscsi.BaseISCSIConnector):
This method's job is to build that list of potential paths
for a volume that might show up.
This is used during connect_volume time, in which case we want
to connect to the iSCSI target portal.
During get_volume_paths time, we are looking to
This is only used during get_volume_paths time, we are looking to
find a list of existing volume paths for the connection_properties.
In this case, we don't want to connect to the portal. If we
blindly try and connect to a portal, it could create a new iSCSI
@ -185,96 +214,28 @@ class ISCSIConnector(base.BaseLinuxConnector, base_iscsi.BaseISCSIConnector):
:param connection_properties: The dictionary that describes all
of the target volume attributes.
:type connection_properties: dict
:param connect_to_portal: Do we want to try a new connection to the
target portal(s)? Set this to False if you
want to search for existing volumes, not
discover new volumes.
:param connect_to_portal: bool
:param use_rescan: Issue iSCSI rescan during discovery?
:type use_rescan: bool
:returns: dict
:returns: list
"""
target_props = None
connected_to_portal = False
if self.use_multipath:
LOG.info("Multipath discovery for iSCSI enabled")
# Multipath installed, discovering other targets if available
try:
ips_iqns_luns = self._discover_iscsi_portals(
connection_properties)
except Exception:
if 'target_portals' in connection_properties:
raise exception.TargetPortalsNotFound(
target_portals=connection_properties['target_portals'])
elif 'target_portal' in connection_properties:
raise exception.TargetPortalNotFound(
target_portal=connection_properties['target_portal'])
else:
raise
if not connection_properties.get('target_iqns'):
# There are two types of iSCSI multipath devices. One which
# shares the same iqn between multiple portals, and the other
# which use different iqns on different portals.
# Try to identify the type by checking the iscsiadm output
# if the iqn is used by multiple portals. If it is, it's
# the former, so use the supplied iqn. Otherwise, it's the
# latter, so try the ip,iqn combinations to find the targets
# which constitutes the multipath device.
main_iqn = connection_properties['target_iqn']
all_portals = {(ip, lun) for ip, iqn, lun in ips_iqns_luns}
match_portals = {(ip, lun) for ip, iqn, lun in ips_iqns_luns
if iqn == main_iqn}
if len(all_portals) == len(match_portals):
ips_iqns_luns = [(p[0], main_iqn, p[1])
for p in all_portals]
for ip, iqn, lun in ips_iqns_luns:
props = copy.deepcopy(connection_properties)
props['target_portal'] = ip
props['target_iqn'] = iqn
if connect_to_portal:
if self._connect_to_iscsi_portal(props):
connected_to_portal = True
if use_rescan:
self._rescan_iscsi(ips_iqns_luns)
host_devices = self._get_device_path(connection_properties)
else:
LOG.info("Multipath discovery for iSCSI not enabled.")
iscsi_sessions = []
if not connect_to_portal:
iscsi_sessions = self._get_iscsi_sessions()
iscsi_sessions = self._get_iscsi_sessions()
iscsi_portals_with_sessions = [s[2] for s in iscsi_sessions]
host_devices = []
target_props = connection_properties
host_devices = set()
for props in self._iterate_all_targets(connection_properties):
if connect_to_portal:
if self._connect_to_iscsi_portal(props):
target_props = props
connected_to_portal = True
host_devices = self._get_device_path(props)
break
else:
LOG.warning(
'Failed to connect to iSCSI portal %(portal)s.',
{'portal': props['target_portal']})
else:
# If we aren't trying to connect to the portal, we
# want to find ALL possible paths from all of the
# alternate portals
if props['target_portal'] in iscsi_sessions:
paths = self._get_device_path(props)
host_devices = list(set(paths + host_devices))
# If we aren't trying to connect to the portal, we
# want to find ALL possible paths from all of the
# alternate portals
if props['target_portal'] in iscsi_portals_with_sessions:
paths = self._get_device_path(props)
host_devices.update(paths)
host_devices = list(host_devices)
if connect_to_portal and not connected_to_portal:
msg = _("Could not login to any iSCSI portal.")
LOG.error(msg)
raise exception.FailedISCSITargetPortalLogin(message=msg)
return host_devices, target_props
return host_devices
def set_execute(self, execute):
super(ISCSIConnector, self).set_execute(execute)
@ -424,7 +385,6 @@ class ISCSIConnector(base.BaseLinuxConnector, base_iscsi.BaseISCSIConnector):
@utils.trace
@synchronized('connect_volume')
@utils.retry(exceptions=(exception.VolumeDeviceNotFound))
def connect_volume(self, connection_properties):
"""Attach the volume to instance_name.
@ -447,66 +407,224 @@ class ISCSIConnector(base.BaseLinuxConnector, base_iscsi.BaseISCSIConnector):
target_lun(s) - LUN id of the volume
Note that plural keys may be used when use_multipath=True
"""
device_info = {'type': 'block'}
# At this point the host_devices may be an empty list
host_devices, target_props = self._get_potential_volume_paths(
connection_properties)
# The /dev/disk/by-path/... node is not always present immediately
# TODO(justinsb): This retry-with-delay is a pattern, move to utils?
tries = 0
# Loop until at least 1 path becomes available
while all(not os.path.exists(x) for x in host_devices):
if tries >= self.device_scan_attempts:
raise exception.VolumeDeviceNotFound(device=host_devices)
LOG.info("ISCSI volume not yet found at: %(host_devices)s. "
"Will rescan & retry. Try number: %(tries)s.",
{'host_devices': host_devices, 'tries': tries})
try:
if self.use_multipath:
# We need to refresh the paths as the devices may be empty
host_devices, target_props = (
self._get_potential_volume_paths(connection_properties))
else:
if tries:
host_devices = self._get_device_path(target_props)
self._run_iscsiadm(target_props, ("--rescan",))
return self._connect_multipath_volume(connection_properties)
return self._connect_single_volume(connection_properties)
except Exception:
# NOTE(geguileo): By doing the cleanup here we ensure we only do
# the logins once for multipath if they succeed, but retry if they
# don't, which helps on bad network cases.
with excutils.save_and_reraise_exception():
self._cleanup_connection(connection_properties, force=True)
tries += 1
if all(not os.path.exists(x) for x in host_devices):
time.sleep(tries ** 2)
@utils.retry(exceptions=(exception.VolumeDeviceNotFound))
def _connect_single_volume(self, connection_properties):
"""Connect to a volume using a single path."""
data = {'stop_connecting': False, 'num_logins': 0, 'failed_logins': 0,
'stopped_threads': 0, 'found_devices': [],
'just_added_devices': []}
for props in self._iterate_all_targets(connection_properties):
self._connect_vol(self.device_scan_attempts, props, data)
found_devs = data['found_devices']
if found_devs:
for __ in range(10):
wwn = self._linuxscsi.get_sysfs_wwn(found_devs)
if wwn:
return {'type': 'block', 'scsi_wwn': wwn,
'path': '/dev/' + found_devs[0]}
time.sleep(1)
LOG.debug('Could not find the WWN for %s.', found_devs[0])
# If we failed we must cleanup the connection, as we could be
# leaving the node entry if it's not being used by another device.
ips_iqns_luns = ((props['target_portal'], props['target_iqn'],
props['target_lun']), )
self._cleanup_connection(props, ips_iqns_luns, force=True,
ignore_errors=True)
# Reset connection result values for next try
data.update(num_logins=0, failed_logins=0, found_devices=[])
raise exception.VolumeDeviceNotFound(device='')
def _connect_vol(self, rescans, props, data):
"""Make a connection to a volume, send scans and wait for the device.
This method is specifically designed to support multithreading and
share the results via a shared dictionary with fixed keys, which is
thread safe.
Since the heaviest operations are run via subprocesses we don't worry
too much about the GIL or how the eventlets will handle the context
switching.
The method will only try to log in once, since iscsid's initiator
already tries 8 times by default to do the login, or whatever value we
have as node.session.initial_login_retry_max in our system.
Shared dictionary has the following keys:
- stop_connecting: When the caller wants us to stop the rescans
- num_logins: Count of how many threads have successfully logged in
- failed_logins: Count of how many threads have failed to log in
- stopped_threads: How many threads have finished. This may be
different than num_logins + failed_logins, since
some threads may still be waiting for a device.
- found_devices: List of devices the connections have found
- just_added_devices: Devices that have been found and still have not
been processed by the main thread that manages
all the connecting threads.
:param rescans: Number of rescans to perform before giving up.
:param props: Properties of the connection.
:param data: Shared data.
"""
device = hctl = None
portal = props['target_portal']
session = self._connect_to_iscsi_portal(props)
do_scans = rescans > 0
retry = 1
if session:
data['num_logins'] += 1
LOG.debug('Connected to %s', portal)
while do_scans:
try:
if not hctl:
hctl = self._linuxscsi.get_hctl(session,
props['target_lun'])
# Scan is sent on connect by iscsid, so skip first rescan
if hctl:
if retry > 1:
self._linuxscsi.scan_iscsi(*hctl)
device = self._linuxscsi.device_name_by_hctl(session,
hctl)
if device:
break
except Exception:
LOG.exception('Exception scanning %s', portal)
pass
retry += 1
do_scans = (retry <= rescans and
not (device or data['stop_connecting']))
if do_scans:
time.sleep(retry ** 2)
if device:
LOG.debug('Connected to %s using %s', device,
strutils.mask_password(props))
else:
LOG.warning('LUN %(lun)s on iSCSI portal %(portal)s not found '
'on sysfs after logging in.',
{'lun': props['target_lun'], 'portal': portal})
else:
LOG.warning('Failed to connect to iSCSI portal %s.', portal)
data['failed_logins'] += 1
if device:
data['found_devices'].append(device)
data['just_added_devices'].append(device)
data['stopped_threads'] += 1
@utils.retry(exceptions=(exception.VolumeDeviceNotFound))
def _connect_multipath_volume(self, connection_properties):
"""Connect to a multipathed volume launching parallel login requests.
We will be doing parallel login requests, which will considerably speed
up the process when we have flaky connections.
We'll always try to return a multipath device even if there's only one
path discovered, that way we can return once we have logged in in all
the portals, because the paths will come up later.
To make this possible we tell multipathd that the wwid is a multipath
as soon as we have one device, and then hint multipathd to reconsider
that volume for a multipath asking to add the path, because even if
it's already known by multipathd it would have been discarded if it
was the first time this volume was seen here.
"""
wwn = mpath = None
wwn_added = last_try_on = False
found = []
just_added_devices = []
# Dict used to communicate with threads as detailed in _connect_vol
data = {'stop_connecting': False, 'num_logins': 0, 'failed_logins': 0,
'stopped_threads': 0, 'found_devices': found,
'just_added_devices': just_added_devices}
ips_iqns_luns = self._get_ips_iqns_luns(connection_properties)
# Launch individual threads for each session with the own properties
retries = self.device_scan_attempts
threads = []
for ip, iqn, lun in ips_iqns_luns:
props = connection_properties.copy()
props.update(target_portal=ip, target_iqn=iqn, target_lun=lun)
threads.append(threading.Thread(target=self._connect_vol,
args=(retries, props, data)))
for thread in threads:
thread.start()
# Continue until:
# - All connection attempts have finished and none has logged in
# - Multipath has been found and connection attempts have either
# finished or have already logged in
# - We have finished in all threads, logged in, found some device, and
# 10 seconds have passed, which should be enough with up to 10%
# network package drops.
while not ((len(ips_iqns_luns) == data['stopped_threads'] and
not found) or
(mpath and len(ips_iqns_luns) == data['num_logins'] +
data['failed_logins'])):
# We have devices but we don't know the wwn yet
if not wwn and found:
wwn = self._linuxscsi.get_sysfs_wwn(found)
# We have the wwn but not a multipath
if wwn and not mpath:
mpath = self._linuxscsi.find_sysfs_multipath_dm(found)
if not (mpath or wwn_added):
# Tell multipathd that this wwn is a multipath and hint
# multipathd to recheck all the devices we have just
# connected. We only do this once, since for any new
# device multipathd will already know it is a multipath.
# This is only useful if we have multipathd configured with
# find_multipaths set to yes, and has no effect if it's set
# to no.
wwn_added = self._linuxscsi.multipath_add_wwid(wwn)
while not mpath and just_added_devices:
device_path = '/dev/' + just_added_devices.pop(0)
self._linuxscsi.multipath_add_path(device_path)
mpath = self._linuxscsi.find_sysfs_multipath_dm(found)
# Give some extra time after all threads have finished.
if (not last_try_on and found and
len(ips_iqns_luns) == data['stopped_threads']):
LOG.debug('All connection threads finished, giving 10 seconds '
'for dm to appear.')
last_try_on = time.time() + 10
elif last_try_on and last_try_on < time.time():
break
time.sleep(1)
data['stop_connecting'] = True
for thread in threads:
thread.join()
if tries != 0:
LOG.debug("Found iSCSI node %(host_devices)s "
"(after %(tries)s rescans)",
{'host_devices': host_devices, 'tries': tries})
# If we haven't found any devices let the caller do the cleanup
if not found:
raise exception.VolumeDeviceNotFound(device='')
# Choose an accessible host device
host_device = next(dev for dev in host_devices if os.path.exists(dev))
# NOTE(geguileo): If we cannot find the dm it's because all paths are
# really bad, so we might as well raise a not found exception, but
# in our best effort we'll return a device even if it's probably
# useless.
if not mpath:
LOG.warning('No dm was created, connection to volume is probably '
'bad and will perform poorly.')
return {'type': 'block', 'scsi_wwn': wwn,
'path': '/dev/' + found[0]}
return {'type': 'block', 'scsi_wwn': wwn, 'multipath_id': mpath,
'path': '/dev/' + mpath}
# find out the WWN of the device
device_wwn = self._linuxscsi.get_scsi_wwn(host_device)
LOG.debug("Device WWN = '%(wwn)s'", {'wwn': device_wwn})
device_info['scsi_wwn'] = device_wwn
if self.use_multipath:
(host_device, multipath_id) = (super(
ISCSIConnector, self)._discover_mpath_device(
device_wwn, connection_properties, host_device))
if multipath_id:
device_info['multipath_id'] = multipath_id
device_info['path'] = host_device
LOG.debug("connect_volume returning %s", device_info)
return device_info
def _get_connection_devices(self, connection_properties):
def _get_connection_devices(self, connection_properties,
ips_iqns_luns=None):
"""Get map of devices by sessions from our connection.
For each of the TCP sessions that correspond to our connection
@ -518,8 +636,12 @@ class ISCSIConnector(base.BaseLinuxConnector, base_iscsi.BaseISCSIConnector):
We also include all nodes from our connection that don't have a
session.
If ips_iqns_luns parameter is provided connection_properties won't be
used to get them.
"""
ips_iqns_luns = self._get_all_targets(connection_properties)
if not ips_iqns_luns:
ips_iqns_luns = self._get_all_targets(connection_properties)
nodes = self._get_iscsi_nodes()
sessions = self._get_iscsi_sessions_full()
# Use (portal, iqn) to map the session value
@ -573,8 +695,31 @@ class ISCSIConnector(base.BaseLinuxConnector, base_iscsi.BaseISCSIConnector):
the operation. Default is False.
:type ignore_errors: bool
"""
return self._cleanup_connection(connection_properties, force=force,
ignore_errors=ignore_errors)
def _cleanup_connection(self, connection_properties, ips_iqns_luns=None,
force=False, ignore_errors=False):
"""Cleans up connection flushing and removing devices and multipath.
:param connection_properties: The dictionary that describes all
of the target volume attributes.
:type connection_properties: dict that must include:
target_portal(s) - IP and optional port
target_iqn(s) - iSCSI Qualified Name
target_lun(s) - LUN id of the volume
:param ips_iqns_luns: Use this list of tuples instead of information
from the connection_properties.
:param force: Whether to forcefully disconnect even if flush fails.
:type force: bool
:param ignore_errors: When force is True, this will decide whether to
ignore errors or raise an exception once finished
the operation. Default is False.
:type ignore_errors: bool
"""
exc = exception.ExceptionChainer()
devices_map = self._get_connection_devices(connection_properties)
devices_map = self._get_connection_devices(connection_properties,
ips_iqns_luns)
# Remove devices and multipath from this connection
remove_devices = set()
@ -689,24 +834,24 @@ class ISCSIConnector(base.BaseLinuxConnector, base_iscsi.BaseISCSIConnector):
return ips, iqns
def _connect_to_iscsi_portal(self, connection_properties):
"""Connect to an iSCSI portal-target an return the session id."""
portal = connection_properties['target_portal'].split(",")[0]
target_iqn = connection_properties['target_iqn']
# NOTE(vish): If we are on the same host as nova volume, the
# discovery makes the target so we don't need to
# run --op new. Therefore, we check to see if the
# target exists, and if we get 255 (Not Found), then
# we run --op new. This will also happen if another
# volume is using the same target.
LOG.info("Trying to connect to iSCSI portal %(portal)s",
{"portal": connection_properties['target_portal']})
try:
self._run_iscsiadm(connection_properties, ())
except putils.ProcessExecutionError as exc:
# iscsiadm returns 21 for "No records found" after version 2.0-871
if exc.exit_code in [21, 255]:
self._run_iscsiadm(connection_properties,
('--interface', self._get_transport(),
'--op', 'new'))
else:
raise
# iscsiadm returns 21 for "No records found" after version 2.0-871
LOG.info("Trying to connect to iSCSI portal %s", portal)
err = self._run_iscsiadm(connection_properties, (),
check_exit_code=(0, 21, 255))[1]
if err:
self._run_iscsiadm(connection_properties,
('--interface', self._get_transport(),
'--op', 'new'))
if connection_properties.get('auth_method'):
self._iscsiadm_update(connection_properties,
@ -719,44 +864,31 @@ class ISCSIConnector(base.BaseLinuxConnector, base_iscsi.BaseISCSIConnector):
"node.session.auth.password",
connection_properties['auth_password'])
# Duplicate logins crash iscsiadm after load,
# so we scan active sessions to see if the node is logged in.
out = self._run_iscsiadm_bare(["-m", "session"],
run_as_root=True,
check_exit_code=[0, 1, 21])[0] or ""
# We exit once we are logged in or once we fail login
while True:
# Duplicate logins crash iscsiadm after load, so we scan active
# sessions to see if the node is logged in.
sessions = self._get_iscsi_sessions_full()
for s in sessions:
# Found our session, return session_id
if 'tcp:' == s[0] and portal == s[2] and s[4] == target_iqn:
return s[1]
portals = [{'portal': p.split(" ")[2], 'iqn': p.split(" ")[3]}
for p in out.splitlines() if p.startswith("tcp:")]
stripped_portal = connection_properties['target_portal'].split(",")[0]
if len(portals) == 0 or len([s for s in portals
if stripped_portal ==
s['portal'].split(",")[0]
and
s['iqn'] ==
connection_properties['target_iqn']]
) == 0:
try:
self._run_iscsiadm(connection_properties,
("--login",),
check_exit_code=[0, 255])
except putils.ProcessExecutionError as err:
# exit_code=15 means the session already exists, so it should
# be regarded as successful login.
if err.exit_code not in [15]:
LOG.warning('Failed to login iSCSI target %(iqn)s '
'on portal %(portal)s (exit code '
'%(err)s).',
{'iqn': connection_properties['target_iqn'],
'portal': connection_properties[
'target_portal'],
'err': err.exit_code})
return False
self._run_iscsiadm(connection_properties, ("--login",),
check_exit_code=(0, 15, 255))
except putils.ProcessExecutionError as err:
LOG.warning('Failed to login iSCSI target %(iqn)s on portal '
'%(portal)s (exit code %(err)s).',
{'iqn': target_iqn, 'portal': portal,
'err': err.exit_code})
return None
self._iscsiadm_update(connection_properties,
"node.startup",
"automatic")
return True
def _disconnect_from_iscsi_portal(self, connection_properties):
self._iscsiadm_update(connection_properties, "node.startup", "manual",
@ -808,72 +940,3 @@ class ISCSIConnector(base.BaseLinuxConnector, base_iscsi.BaseISCSIConnector):
{'multipath_command': multipath_command,
'out': out, 'err': err})
return (out, err)
@utils.retry(exception.HostChannelsTargetsNotFound, backoff_rate=1.5)
def _get_hosts_channels_targets_luns(self, ips_iqns_luns):
iqns = {iqn: lun for ip, iqn, lun in ips_iqns_luns}
LOG.debug('Getting hosts, channels, and targets for iqns: %s',
iqns.keys())
# Get all targets indexed by scsi host path
targets_paths = glob.glob('/sys/class/scsi_host/host*/device/session*/'
'target*')
targets = collections.defaultdict(list)
for path in targets_paths:
target = path.split('/target')[1]
host = path.split('/device/')[0]
targets[host].append(target.split(':'))
# Get all scsi targets
sessions = glob.glob('/sys/class/scsi_host/host*/device/session*/'
'iscsi_session/session*/targetname')
result = []
for session in sessions:
# Read iSCSI target name
try:
with open(session, 'r') as f:
targetname = f.read().strip('\n')
except Exception:
continue
# If we are interested in it we store its target information
if targetname in iqns:
host = session.split('/device/')[0]
for __, channel, target_id in targets[host]:
result.append((host, channel, target_id, iqns[targetname]))
# Stop as soon as we have the info of all our iqns, even if
# there are more sessions to check
del iqns[targetname]
if not iqns:
break
# In some cases the login and udev triggers may not have been fast
# enough to create all sysfs entries, so we want to retry.
else:
raise exception.HostChannelsTargetsNotFound(iqns=iqns.keys(),
found=result)
return result
def _rescan_iscsi(self, ips_iqns_luns):
try:
hctls = self._get_hosts_channels_targets_luns(ips_iqns_luns)
except exception.HostChannelsTargetsNotFound as e:
if not e.found:
LOG.error('iSCSI scan failed: %s', e)
return
hctls = e.found
LOG.warning('iSCSI scan: %(error)s\nScanning %(hosts)s',
{'error': e, 'hosts': [h for h, c, t, l in hctls]})
for host_path, channel, target_id, target_lun in hctls:
LOG.debug('Scanning host %(host)s c: %(channel)s, '
't: %(target)s, l: %(lun)s)',
{'host': host_path, 'channel': channel,
'target': target_id, 'lun': target_lun})
self._linuxscsi.echo_scsi_command(
"%s/scan" % host_path,
"%(c)s %(t)s %(l)s" % {'c': channel,
't': target_id,
'l': target_lun})

View File

@ -39,6 +39,9 @@ MULTIPATH_DEVICE_ACTIONS = ['unchanged:', 'reject:', 'reload:',
class LinuxSCSI(executor.Executor):
# As found in drivers/scsi/scsi_lib.c
WWN_TYPES = {'t10.': '1', 'eui.': '2', 'naa.': '3'}
def echo_scsi_command(self, path, content):
"""Used to echo strings to scsi subsystem."""
@ -103,6 +106,41 @@ class LinuxSCSI(executor.Executor):
return dev_info
def get_sysfs_wwn(self, device_names):
"""Return the wwid from sysfs in any of devices in udev format."""
wwid = self.get_sysfs_wwid(device_names)
glob_str = '/dev/disk/by-id/scsi-'
wwn_paths = glob.glob(glob_str + '*')
# If we don't have multiple designators on page 0x83
if wwid and glob_str + wwid in wwn_paths:
return wwid
# If we have multiple designators follow the symlinks
for wwn_path in wwn_paths:
try:
if os.path.islink(wwn_path) and os.stat(wwn_path):
path = os.path.realpath(wwn_path)
if path.startswith('/dev/') and path[5:] in device_names:
return wwn_path[len(glob_str):]
except OSError:
continue
return ''
def get_sysfs_wwid(self, device_names):
"""Return the wwid from sysfs in any of devices in udev format."""
for device_name in device_names:
try:
with open('/sys/block/%s/device/wwid' % device_name) as f:
wwid = f.read().strip()
except IOError:
continue
# The sysfs wwid has the wwn type in string format as a prefix,
# but udev uses its numerical representation as returned by
# scsi_id's page 0x83, so we need to map it
udev_wwid = self.WWN_TYPES.get(wwid[:4], '8') + wwid[4:]
return udev_wwid
return ''
def get_scsi_wwn(self, path):
"""Read the WWN from page 0x83 value for a SCSI device."""
@ -186,10 +224,22 @@ class LinuxSCSI(executor.Executor):
# Wait until the symlinks are removed
with exc.context(force, 'Some devices remain from %s', devices_names):
self.wait_for_volumes_removal(devices_names)
try:
self.wait_for_volumes_removal(devices_names)
finally:
# Since we use /dev/disk/by-id/scsi- links to get the wwn we
# must ensure they are always removed.
self._remove_scsi_symlinks(devices_names)
return multipath_name
def _remove_scsi_symlinks(self, devices_names):
devices = ['/dev/' + dev for dev in devices_names]
links = glob.glob('/dev/disk/by-id/scsi-*')
unlink = [link for link in links
if os.path.realpath(link) in devices]
if unlink:
priv_rootwrap.unlink_root(no_errors=True, *unlink)
def flush_device_io(self, device):
"""This is used to flush any remaining IO in the buffers."""
if os.path.exists(device):
@ -486,3 +536,82 @@ class LinuxSCSI(executor.Executor):
else:
return ("0x%04x%04x00000000" %
(lun_id & 0xffff, lun_id >> 16 & 0xffff))
def get_hctl(self, session, lun):
"""Given an iSCSI session return the host, channel, target, and lun."""
glob_str = '/sys/class/iscsi_host/host*/device/session' + session
paths = glob.glob(glob_str + '/target*')
if paths:
__, channel, target = os.path.split(paths[0])[1].split(':')
# Check if we can get the host
else:
target = channel = '-'
paths = glob.glob(glob_str)
if not paths:
LOG.debug('No hctl found on session %s with lun %s', session, lun)
return None
# Extract the host number from the path
host = paths[0][26:paths[0].index('/', 26)]
res = (host, channel, target, lun)
LOG.debug('HCTL %s found on session %s with lun %s', res, session, lun)
return res
def device_name_by_hctl(self, session, hctl):
"""Find the device name given a session and the hctl.
:param session: A string with the session number
"param hctl: An iterable with the host, channel, target, and lun as
passed to scan. ie: ('5', '-', '-', '0')
"""
if '-' in hctl:
hctl = ['*' if x == '-' else x for x in hctl]
path = ('/sys/class/scsi_host/host%(h)s/device/session%(s)s/target'
'%(h)s:%(c)s:%(t)s/%(h)s:%(c)s:%(t)s:%(l)s/block/*' %
{'h': hctl[0], 'c': hctl[1], 't': hctl[2], 'l': hctl[3],
's': session})
# Sort devices and return the first so we don't return a partition
devices = sorted(glob.glob(path))
device = os.path.split(devices[0])[1] if devices else None
LOG.debug('Searching for a device in session %s and hctl %s yield: %s',
session, hctl, device)
return device
def scan_iscsi(self, host, channel='-', target='-', lun='-'):
"""Send an iSCSI scan request given the host and optionally the ctl."""
LOG.debug('Scanning host %(host)s c: %(channel)s, '
't: %(target)s, l: %(lun)s)',
{'host': host, 'channel': channel,
'target': target, 'lun': lun})
self.echo_scsi_command('/sys/class/scsi_host/host%s/scan' % host,
'%(c)s %(t)s %(l)s' % {'c': channel,
't': target,
'l': lun})
def multipath_add_wwid(self, wwid):
"""Add a wwid to the list of know multipath wwids.
This has the effect of multipathd being willing to create a dm for a
multipath even when there's only 1 device.
"""
out, err = self._execute('multipath', '-a', wwid,
run_as_root=True,
check_exit_code=False,
root_helper=self._root_helper)
return out.strip() == "wwid '" + wwid + "' added"
def multipath_add_path(self, realpath):
"""Add a path to multipathd for monitoring.
This has the effect of multipathd checking an already checked device
for multipath.
Together with `multipath_add_wwid` we can create a multipath when
there's only 1 path.
"""
stdout, stderr = self._execute('multipathd', 'add', 'path', realpath,
run_as_root=True, timeout=5,
check_exit_code=False,
root_helper=self._root_helper)
return stdout.strip() == 'ok'

View File

@ -36,6 +36,7 @@ the urgency of (1)), then work on the larger refactor that addresses
"""
import os
import signal
import six
import threading
@ -191,3 +192,29 @@ def execute(*cmd, **kwargs):
def execute_root(*cmd, **kwargs):
"""NB: Raises processutils.ProcessExecutionError/OSError on failure."""
return custom_execute(*cmd, shell=False, run_as_root=False, **kwargs)
@privileged.default.entrypoint
def unlink_root(*links, **kwargs):
"""Unlink system links with sys admin privileges.
By default it will raise an exception if a link does not exist and stop
unlinking remaining links.
This behavior can be modified passing optional parameters `no_errors` and
`raise_at_end`.
:param no_errors: Don't raise an exception on error
"param raise_at_end: Don't raise an exception on first error, try to
unlink all links and then raise a ChainedException
with all the errors that where found.
"""
no_errors = kwargs.get('no_errors', False)
raise_at_end = kwargs.get('raise_at_end', False)
exc = exception.ExceptionChainer()
catch_exception = no_errors or raise_at_end
for link in links:
with exc.context(catch_exception, 'Unlink failed for %s', link):
os.unlink(link)
if not no_errors and raise_at_end and exc:
raise exc

File diff suppressed because it is too large Load Diff

View File

@ -217,6 +217,7 @@ class LinuxSCSITestCase(base.TestCase):
@ddt.data({'do_raise': False, 'force': False},
{'do_raise': True, 'force': True})
@ddt.unpack
@mock.patch.object(linuxscsi.LinuxSCSI, '_remove_scsi_symlinks')
@mock.patch.object(linuxscsi.LinuxSCSI, 'flush_multipath_device')
@mock.patch.object(linuxscsi.LinuxSCSI, 'get_dm_name')
@mock.patch.object(linuxscsi.LinuxSCSI, 'find_sysfs_multipath_dm')
@ -226,6 +227,7 @@ class LinuxSCSITestCase(base.TestCase):
find_dm_mock,
get_dm_name_mock,
flush_mp_mock,
remove_link_mock,
do_raise, force):
if do_raise:
flush_mp_mock.side_effect = Exception
@ -245,7 +247,9 @@ class LinuxSCSITestCase(base.TestCase):
mock.call('/dev/sdb', mock.sentinel.Force, exc)])
wait_mock.assert_called_once_with(devices_names)
self.assertEqual(do_raise, bool(exc))
remove_link_mock.assert_called_once_with(devices_names)
@mock.patch.object(linuxscsi.LinuxSCSI, '_remove_scsi_symlinks')
@mock.patch.object(linuxscsi.LinuxSCSI, 'flush_multipath_device',
side_effect=Exception)
@mock.patch.object(linuxscsi.LinuxSCSI, 'get_dm_name')
@ -254,7 +258,7 @@ class LinuxSCSITestCase(base.TestCase):
@mock.patch.object(linuxscsi.LinuxSCSI, 'remove_scsi_device')
def test_remove_connection_multipath_fail(self, remove_mock, wait_mock,
find_dm_mock, get_dm_name_mock,
flush_mp_mock):
flush_mp_mock, remove_link_mock):
flush_mp_mock.side_effect = exception.ExceptionChainer
devices_names = ('sda', 'sdb')
exc = exception.ExceptionChainer()
@ -267,11 +271,14 @@ class LinuxSCSITestCase(base.TestCase):
flush_mp_mock.assert_called_once_with(get_dm_name_mock.return_value)
remove_mock.assert_not_called()
wait_mock.assert_not_called()
remove_link_mock.assert_not_called()
self.assertTrue(bool(exc))
@mock.patch.object(linuxscsi.LinuxSCSI, '_remove_scsi_symlinks')
@mock.patch.object(linuxscsi.LinuxSCSI, 'wait_for_volumes_removal')
@mock.patch.object(linuxscsi.LinuxSCSI, 'remove_scsi_device')
def test_remove_connection_singlepath(self, remove_mock, wait_mock):
def test_remove_connection_singlepath(self, remove_mock, wait_mock,
remove_link_mock):
devices_names = ('sda', 'sdb')
exc = exception.ExceptionChainer()
self.linuxscsi.remove_connection(devices_names, is_multipath=False,
@ -281,6 +288,7 @@ class LinuxSCSITestCase(base.TestCase):
[mock.call('/dev/sda', mock.sentinel.Force, exc),
mock.call('/dev/sdb', mock.sentinel.Force, exc)])
wait_mock.assert_called_once_with(devices_names)
remove_link_mock.assert_called_once_with(devices_names)
def test_find_multipath_device_3par_ufn(self):
def fake_execute(*cmd, **kwargs):
@ -759,3 +767,177 @@ loop0 0"""
False, None, mock_rootwrap.execute))
mock_rootwrap.execute.assert_called_once_with(
'multipathd', 'show', 'status', run_as_root=True, root_helper=None)
@mock.patch('glob.glob')
@mock.patch.object(linuxscsi.LinuxSCSI, 'get_sysfs_wwid')
def test_get_sysfs_wwn_single_designator(self, get_wwid_mock, glob_mock):
glob_mock.return_value = ['/dev/disk/by-id/scsi-wwid1',
'/dev/disk/by-id/scsi-wwid2']
get_wwid_mock.return_value = 'wwid1'
res = self.linuxscsi.get_sysfs_wwn(mock.sentinel.device_names)
self.assertEqual('wwid1', res)
glob_mock.assert_called_once_with('/dev/disk/by-id/scsi-*')
get_wwid_mock.assert_called_once_with(mock.sentinel.device_names)
@mock.patch('os.path.realpath', side_effect=('/other/path',
'/dev/sda', '/dev/sdb'))
@mock.patch('os.path.islink', side_effect=(False, True, True, True, True))
@mock.patch('os.stat', side_effect=(False, True, True, True))
@mock.patch('glob.glob')
@mock.patch.object(linuxscsi.LinuxSCSI, 'get_sysfs_wwid')
def test_get_sysfs_wwn_multiple_designators(self, get_wwid_mock, glob_mock,
stat_mock, islink_mock,
realpath_mock):
glob_mock.return_value = ['/dev/disk/by-id/scsi-fail-link',
'/dev/disk/by-id/scsi-fail-stat',
'/dev/disk/by-id/scsi-non-dev',
'/dev/disk/by-id/scsi-wwid1',
'/dev/disk/by-id/scsi-wwid2']
get_wwid_mock.return_value = 'pre-wwid'
devices = ['sdb', 'sdc']
res = self.linuxscsi.get_sysfs_wwn(devices)
self.assertEqual('wwid2', res)
glob_mock.assert_called_once_with('/dev/disk/by-id/scsi-*')
get_wwid_mock.assert_called_once_with(devices)
@mock.patch('os.path.realpath', side_effect=('/dev/sda', '/dev/sdb'))
@mock.patch('os.path.islink', return_value=True)
@mock.patch('os.stat', return_value=True)
@mock.patch('glob.glob')
@mock.patch.object(linuxscsi.LinuxSCSI, 'get_sysfs_wwid')
def test_get_sysfs_wwn_not_found(self, get_wwid_mock, glob_mock, stat_mock,
islink_mock, realpath_mock):
glob_mock.return_value = ['/dev/disk/by-id/scsi-wwid1',
'/dev/disk/by-id/scsi-wwid2']
get_wwid_mock.return_value = 'pre-wwid'
devices = ['sdc']
res = self.linuxscsi.get_sysfs_wwn(devices)
self.assertEqual('', res)
glob_mock.assert_called_once_with('/dev/disk/by-id/scsi-*')
get_wwid_mock.assert_called_once_with(devices)
@ddt.data({'wwn_type': 't10.', 'num_val': '1'},
{'wwn_type': 'eui.', 'num_val': '2'},
{'wwn_type': 'naa.', 'num_val': '3'})
@ddt.unpack
@mock.patch('six.moves.builtins.open')
def test_get_sysfs_wwid(self, open_mock, wwn_type, num_val):
read_fail = mock.MagicMock()
read_fail.__enter__.return_value.read.side_effect = IOError
read_data = mock.MagicMock()
read_data.__enter__.return_value.read.return_value = (wwn_type +
'wwid1\n')
open_mock.side_effect = (IOError, read_fail, read_data)
res = self.linuxscsi.get_sysfs_wwid(['sda', 'sdb', 'sdc'])
self.assertEqual(num_val + 'wwid1', res)
open_mock.assert_has_calls([mock.call('/sys/block/sda/device/wwid'),
mock.call('/sys/block/sdb/device/wwid'),
mock.call('/sys/block/sdc/device/wwid')])
@mock.patch('six.moves.builtins.open', side_effect=IOError)
def test_get_sysfs_wwid_not_found(self, open_mock):
res = self.linuxscsi.get_sysfs_wwid(['sda', 'sdb'])
self.assertEqual('', res)
open_mock.assert_has_calls([mock.call('/sys/block/sda/device/wwid'),
mock.call('/sys/block/sdb/device/wwid')])
@mock.patch.object(linuxscsi.priv_rootwrap, 'unlink_root')
@mock.patch('glob.glob')
@mock.patch('os.path.realpath', side_effect=['/dev/sda', '/dev/sdb',
'/dev/sdc'])
def test_remove_scsi_symlinks(self, realpath_mock, glob_mock, unlink_mock):
paths = ['/dev/disk/by-id/scsi-wwid1', '/dev/disk/by-id/scsi-wwid2',
'/dev/disk/by-id/scsi-wwid3']
glob_mock.return_value = paths
self.linuxscsi._remove_scsi_symlinks(['sdb', 'sdc', 'sdd'])
glob_mock.assert_called_once_with('/dev/disk/by-id/scsi-*')
realpath_mock.assert_has_calls([mock.call(g) for g in paths])
unlink_mock.assert_called_once_with(no_errors=True, *paths[1:])
@mock.patch.object(linuxscsi.priv_rootwrap, 'unlink_root')
@mock.patch('glob.glob')
@mock.patch('os.path.realpath', side_effect=['/dev/sda', '/dev/sdb'])
def test_remove_scsi_symlinks_no_links(self, realpath_mock, glob_mock,
unlink_mock):
paths = ['/dev/disk/by-id/scsi-wwid1', '/dev/disk/by-id/scsi-wwid2']
glob_mock.return_value = paths
self.linuxscsi._remove_scsi_symlinks(['/dev/sdd', '/dev/sde'])
glob_mock.assert_called_once_with('/dev/disk/by-id/scsi-*')
realpath_mock.assert_has_calls([mock.call(g) for g in paths])
unlink_mock.assert_not_called()
@mock.patch('glob.glob')
def test_get_hctl_with_target(self, glob_mock):
glob_mock.return_value = [
'/sys/class/iscsi_host/host3/device/session1/target3:4:5',
'/sys/class/iscsi_host/host3/device/session1/target3:4:6']
res = self.linuxscsi.get_hctl('1', '2')
self.assertEqual(('3', '4', '5', '2'), res)
glob_mock.assert_called_once_with(
'/sys/class/iscsi_host/host*/device/session1/target*')
@mock.patch('glob.glob')
def test_get_hctl_no_target(self, glob_mock):
glob_mock.side_effect = [
[],
['/sys/class/iscsi_host/host3/device/session1',
'/sys/class/iscsi_host/host3/device/session1']]
res = self.linuxscsi.get_hctl('1', '2')
self.assertEqual(('3', '-', '-', '2'), res)
glob_mock.assert_has_calls(
[mock.call('/sys/class/iscsi_host/host*/device/session1/target*'),
mock.call('/sys/class/iscsi_host/host*/device/session1')])
@mock.patch('glob.glob', return_value=[])
def test_get_hctl_no_paths(self, glob_mock):
res = self.linuxscsi.get_hctl('1', '2')
self.assertIsNone(res)
glob_mock.assert_has_calls(
[mock.call('/sys/class/iscsi_host/host*/device/session1/target*'),
mock.call('/sys/class/iscsi_host/host*/device/session1')])
@mock.patch('glob.glob')
def test_device_name_by_hctl(self, glob_mock):
glob_mock.return_value = [
'/sys/class/scsi_host/host3/device/session1/target3:4:5/3:4:5:2/'
'block/sda2',
'/sys/class/scsi_host/host3/device/session1/target3:4:5/3:4:5:2/'
'block/sda']
res = self.linuxscsi.device_name_by_hctl('1', ('3', '4', '5', '2'))
self.assertEqual('sda', res)
glob_mock.assert_called_once_with(
'/sys/class/scsi_host/host3/device/session1/target3:4:5/3:4:5:2/'
'block/*')
@mock.patch('glob.glob')
def test_device_name_by_hctl_wildcards(self, glob_mock):
glob_mock.return_value = [
'/sys/class/scsi_host/host3/device/session1/target3:4:5/3:4:5:2/'
'block/sda2',
'/sys/class/scsi_host/host3/device/session1/target3:4:5/3:4:5:2/'
'block/sda']
res = self.linuxscsi.device_name_by_hctl('1', ('3', '-', '-', '2'))
self.assertEqual('sda', res)
glob_mock.assert_called_once_with(
'/sys/class/scsi_host/host3/device/session1/target3:*:*/3:*:*:2/'
'block/*')
@mock.patch('glob.glob', mock.Mock(return_value=[]))
def test_device_name_by_hctl_no_devices(self):
res = self.linuxscsi.device_name_by_hctl('1', ('4', '5', '6', '2'))
self.assertIsNone(res)
@mock.patch.object(linuxscsi.LinuxSCSI, 'echo_scsi_command')
def test_scsi_iscsi(self, echo_mock):
self.linuxscsi.scan_iscsi('host', 'channel', 'target', 'lun')
echo_mock.assert_called_once_with('/sys/class/scsi_host/hosthost/scan',
'channel target lun')
def test_multipath_add_wwid(self):
self.linuxscsi.multipath_add_wwid('wwid1')
self.assertEqual(['multipath -a wwid1'], self.cmds)
def test_multipath_add_path(self):
self.linuxscsi.multipath_add_path('/dev/sda')
self.assertEqual(['multipathd add path /dev/sda'], self.cmds)

View File

@ -125,3 +125,33 @@ class PrivRootwrapTestCase(base.TestCase):
check_exit_code=False)
self.assertEqual('', out)
self.assertIsInstance(err, six.string_types)
@mock.patch.object(priv_rootwrap.unlink_root.privsep_entrypoint,
'client_mode', False)
@mock.patch('os.unlink', side_effect=IOError)
def test_unlink_root(self, unlink_mock):
links = ['/dev/disk/by-id/link1', '/dev/disk/by-id/link2']
priv_rootwrap.unlink_root(*links, no_errors=True)
unlink_mock.assert_has_calls([mock.call(links[0]),
mock.call(links[1])])
@mock.patch.object(priv_rootwrap.unlink_root.privsep_entrypoint,
'client_mode', False)
@mock.patch('os.unlink', side_effect=IOError)
def test_unlink_root_raise(self, unlink_mock):
links = ['/dev/disk/by-id/link1', '/dev/disk/by-id/link2']
self.assertRaises(IOError,
priv_rootwrap.unlink_root,
*links, no_errors=False)
unlink_mock.assert_called_once_with(links[0])
@mock.patch.object(priv_rootwrap.unlink_root.privsep_entrypoint,
'client_mode', False)
@mock.patch('os.unlink', side_effect=IOError)
def test_unlink_root_raise_at_end(self, unlink_mock):
links = ['/dev/disk/by-id/link1', '/dev/disk/by-id/link2']
self.assertRaises(exception.ExceptionChainer,
priv_rootwrap.unlink_root,
*links, raise_at_end=True)
unlink_mock.assert_has_calls([mock.call(links[0]),
mock.call(links[1])])

View File

@ -0,0 +1,5 @@
---
fixes:
- |
iSCSI connect mechanism refactoring to be faster, more robust, more
reliable.