Check paths before extending device

In a multipath enabled deployment, when we try to extend a volume
and some paths are down, we fail to extend the multipath device and
leave the environment in an inconsistent state. See LP Bug #2032177
for more details.

To handle this, we check if all the paths are up before trying to
extend the device and fail fast if any path is down. This ensures
we don't partially extend some paths and leave the other to the
original size leading to inconsistent state in the environment.

Closes-Bug: 2032177

Change-Id: I5fc02efc5e9657821a1335f1c1ac5fe036e9329a
This commit is contained in:
Rajat Dhasmana 2023-07-31 09:14:59 +00:00 committed by whoami-rajat
parent eb76f8283e
commit 1435059da5
6 changed files with 95 additions and 21 deletions

View File

@ -22,7 +22,6 @@ import typing
from typing import Optional
from oslo_concurrency import lockutils
from oslo_concurrency import processutils as putils
from oslo_config import cfg
from oslo_log import log as logging
from oslo_utils import reflection
@ -33,6 +32,7 @@ from os_brick import initiator
from os_brick.initiator import host_driver
from os_brick.initiator import initiator_connector
from os_brick.initiator import linuxscsi
from os_brick import utils
LOG = logging.getLogger(__name__)
CONF = cfg.CONF
@ -124,21 +124,7 @@ class BaseLinuxConnector(initiator_connector.InitiatorConnector):
return props
def check_valid_device(self, path: str, run_as_root: bool = True) -> bool:
cmd = ('dd', 'if=%(path)s' % {"path": path},
'of=/dev/null', 'count=1')
out, info = None, None
try:
out, info = self._execute(*cmd, run_as_root=run_as_root,
root_helper=self._root_helper)
except putils.ProcessExecutionError as e:
LOG.error("Failed to access the device on the path "
"%(path)s: %(error)s.",
{"path": path, "error": e.stderr})
return False
# If the info is none, the path does not exist.
if info is None:
return False
return True
return utils.check_valid_device(self, path)
def get_all_available_volumes(
self,

View File

@ -679,6 +679,15 @@ class LinuxSCSI(executor.Executor):
This function tries to signal the local system's kernel
that an already attached volume might have been resized.
"""
# We need all paths up before extending the devices.
# see Launchpad Bug: #2032177 for more details.
LOG.debug("Checking paths are valid %s", volume_paths)
for volume_path in volume_paths:
if not utils.check_valid_device(self, volume_path):
LOG.error("Path status is down for path %s", volume_path)
raise exception.BrickException("All paths need to be up "
"to extend the device.")
LOG.debug("extend volume %s", volume_paths)
for volume_path in volume_paths:

View File

@ -770,6 +770,7 @@ loop0 0"""
expected_commands = ['multipathd resize map %s' % dm_path]
self.assertEqual(expected_commands, self.cmds)
@mock.patch('os_brick.utils.check_valid_device')
@mock.patch.object(linuxscsi.LinuxSCSI, 'find_multipath_device_path')
@mock.patch.object(linuxscsi.LinuxSCSI, 'get_scsi_wwn')
@mock.patch('os_brick.utils.get_device_size')
@ -777,7 +778,8 @@ loop0 0"""
def test_extend_volume_no_mpath(self, mock_device_info,
mock_device_size,
mock_scsi_wwn,
mock_find_mpath_path):
mock_find_mpath_path,
mock_valid_dev):
"""Test extending a volume where there is no multipath device."""
fake_device = {'host': '0',
'channel': '0',
@ -792,6 +794,7 @@ loop0 0"""
wwn = '1234567890123456'
mock_scsi_wwn.return_value = wwn
mock_find_mpath_path.return_value = None
mock_valid_dev.return_value = True
ret_size = self.linuxscsi.extend_volume(['/dev/fake'])
self.assertEqual(second_size, ret_size)
@ -800,6 +803,7 @@ loop0 0"""
expected_cmds = ['tee -a /sys/bus/scsi/drivers/sd/0:0:0:1/rescan']
self.assertEqual(expected_cmds, self.cmds)
@mock.patch('os_brick.utils.check_valid_device')
@mock.patch.object(linuxscsi.LinuxSCSI, 'find_multipath_device_path')
@mock.patch.object(linuxscsi.LinuxSCSI, 'get_scsi_wwn')
@mock.patch('os_brick.utils.get_device_size')
@ -809,7 +813,8 @@ loop0 0"""
mock_device_info,
mock_device_size,
mock_scsi_wwn,
mock_find_mpath_path):
mock_find_mpath_path,
mock_valid_dev):
"""Test extending a volume where there is a multipath device."""
mock_device_info.side_effect = [{'host': host,
'channel': '0',
@ -823,6 +828,7 @@ loop0 0"""
mock_find_mpath_path.return_value = mpath_path
dm_path = '/dev/dm-5'
mock_realpath.return_value = dm_path
mock_valid_dev.return_value = True
ret_size = self.linuxscsi.extend_volume(['/dev/fake1', '/dev/fake2'],
use_multipath=True)
@ -836,6 +842,7 @@ loop0 0"""
self.assertEqual(expected_cmds, self.cmds)
mock_realpath.assert_called_once_with(mpath_path)
@mock.patch('os_brick.utils.check_valid_device')
@mock.patch.object(linuxscsi.LinuxSCSI, '_multipath_resize_map')
@mock.patch.object(linuxscsi.LinuxSCSI, 'find_multipath_device_path')
@mock.patch.object(linuxscsi.LinuxSCSI, 'get_scsi_wwn')
@ -845,7 +852,8 @@ loop0 0"""
mock_device_size,
mock_scsi_wwn,
mock_find_mpath_path,
mock_mpath_resize_map):
mock_mpath_resize_map,
mock_valid_dev):
"""Test extending a volume where there is a multipath device fail."""
mock_device_info.side_effect = [{'host': host,
'channel': '0',
@ -860,6 +868,7 @@ loop0 0"""
mock_mpath_resize_map.side_effect = putils.ProcessExecutionError(
stdout="fail")
mock_valid_dev.return_value = True
self.assertRaises(
putils.ProcessExecutionError,
@ -873,6 +882,7 @@ loop0 0"""
'multipathd reconfigure']
self.assertEqual(expected_cmds, self.cmds)
@mock.patch('os_brick.utils.check_valid_device')
@mock.patch('time.sleep')
@mock.patch.object(linuxscsi.LinuxSCSI, '_multipath_resize_map')
@mock.patch.object(linuxscsi.LinuxSCSI, 'find_multipath_device_path')
@ -886,7 +896,8 @@ loop0 0"""
mock_scsi_wwn,
mock_find_mpath_path,
mock_mpath_resize_map,
mock_sleep):
mock_sleep,
mock_valid_dev):
"""Test extending a volume where there is a multipath device fail."""
mock_device_info.side_effect = [{'host': host,
'channel': '0',
@ -903,6 +914,7 @@ loop0 0"""
mock_mpath_resize_map.side_effect = (
putils.ProcessExecutionError(stdout="timeout"),
"success")
mock_valid_dev.return_value = True
ret_size = self.linuxscsi.extend_volume(['/dev/fake1', '/dev/fake2'],
use_multipath=True)
@ -916,6 +928,7 @@ loop0 0"""
mock_mpath_resize_map.assert_has_calls([mock.call(dm_path)] * 2)
mock_realpath.assert_called_once_with(mpath_path)
@mock.patch('os_brick.utils.check_valid_device')
@mock.patch('time.sleep')
@mock.patch('time.time')
@mock.patch.object(linuxscsi.LinuxSCSI, '_multipath_resize_map')
@ -929,7 +942,8 @@ loop0 0"""
mock_find_mpath_path,
mock_mpath_resize_map,
mock_currtime,
mock_sleep):
mock_sleep,
mock_valid_dev):
"""Test extending a volume where there is a multipath device fail."""
mock_device_info.side_effect = [{'host': host,
'channel': '0',
@ -941,6 +955,7 @@ loop0 0"""
mock_scsi_wwn.return_value = wwn
mock_find_mpath_path.return_value = ('/dev/mapper/dm-uuid-mpath-%s' %
wwn)
mock_valid_dev.return_value = True
# time.time is used to check if our own timeout has been exceeded,
# which is why it has to be mocked.
@ -963,6 +978,20 @@ loop0 0"""
['/dev/fake1', '/dev/fake2'],
use_multipath=True)
@mock.patch('os_brick.utils.check_valid_device')
def test_extend_volume_with_mpath_path_down(self, mock_valid_dev):
"""Test extending a volume where there is a path down."""
mock_valid_dev.return_value = False
dev1 = '/dev/fake1'
dev2 = '/dev/fake2'
self.assertRaises(
exception.BrickException,
self.linuxscsi.extend_volume,
[dev1, dev2],
use_multipath=True)
mock_valid_dev.assert_called_once_with(self.linuxscsi, dev1)
def test_process_lun_id_list(self):
lun_list = [2, 255, 88, 370, 5, 256]
result = self.linuxscsi.process_lun_id(lun_list)

View File

@ -83,6 +83,31 @@ class TestUtils(base.TestCase):
mock_create.assert_not_called()
self.assertIsNone(result)
@ddt.data(('fake_info', True), (None, False))
@ddt.unpack
def test_check_valid_device(self, fake_info, expected):
mock_execute = mock.Mock()
mock_execute._execute.return_value = ('fake_out', fake_info)
fake_path = '/dev/fake'
is_valid = utils.check_valid_device(mock_execute, fake_path)
self.assertEqual(expected, is_valid)
mock_execute._execute.assert_called_once_with(
'dd', 'if=/dev/fake', 'of=/dev/null', 'count=1',
run_as_root=True, root_helper=mock_execute._root_helper)
def test_check_valid_device_error(self):
mock_execute = mock.Mock()
p_exception = utils.processutils.ProcessExecutionError
mock_execute._execute.side_effect = p_exception
fake_path = '/dev/fake'
is_valid = utils.check_valid_device(mock_execute, fake_path)
self.assertEqual(False, is_valid)
mock_execute._execute.assert_called_once_with(
'dd', 'if=/dev/fake', 'of=/dev/null', 'count=1',
run_as_root=True, root_helper=mock_execute._root_helper)
class TestRetryDecorator(base.TestCase):

View File

@ -431,6 +431,22 @@ def get_device_size(executor: executor.Executor, device: str) -> Optional[int]:
return None
def check_valid_device(executor: executor.Executor, path: str) -> bool:
cmd = ('dd', 'if=%(path)s' % {"path": path},
'of=/dev/null', 'count=1')
out, info = None, None
try:
out, info = executor._execute(*cmd, run_as_root=True,
root_helper=executor._root_helper)
except processutils.ProcessExecutionError as e:
LOG.error("Failed to access the device on the path "
"%(path)s: %(error)s.",
{"path": path, "error": e.stderr})
return False
# If the info is none, the path does not exist.
return info is not None
class Anything(object):
"""Object equal to everything."""
def __eq__(self, other):

View File

@ -0,0 +1,9 @@
---
fixes:
- |
`Bug #2032177 <https://bugs.launchpad.net/os-brick/+bug/2032177>`_:
Handled the issue when one or more paths of a multipath device are down and
we try to extend the device. This is addressed by adding an additional check
to verify that all devices are up before extending the multipath device to
avoid leaving devices with inconsistent sizes.