From fab367cc265593ac7b46feca8cb13da1385a3d03 Mon Sep 17 00:00:00 2001 From: Hamdy Khader Date: Thu, 13 Sep 2018 14:38:11 +0300 Subject: [PATCH] Retry executing command "nvme list" when fail Only one NVMe connection can be active on the same Compute node, when initiator side has more than one connection then new connection is made, results of the new connected device is not instantaneous. Fixing this issue requires retry and sleep when executing command "nvme list" to retrieve the newly connected device. Change-Id: I6b70140be7023770b83603de723d6d2de3ebb747 Closes-Bug: #1792313 --- os_brick/initiator/connectors/nvme.py | 55 ++++++++++--------- .../tests/initiator/connectors/test_nvme.py | 17 ++++-- 2 files changed, 39 insertions(+), 33 deletions(-) diff --git a/os_brick/initiator/connectors/nvme.py b/os_brick/initiator/connectors/nvme.py index 11d925ca9..83951c6c1 100644 --- a/os_brick/initiator/connectors/nvme.py +++ b/os_brick/initiator/connectors/nvme.py @@ -18,12 +18,12 @@ from oslo_concurrency import processutils as putils from oslo_log import log as logging from os_brick import exception -from os_brick import initiator +from os_brick.i18n import _ from os_brick.initiator.connectors import base from os_brick import utils -DEVICE_SCAN_ATTEMPTS_DEFAULT = 3 +DEVICE_SCAN_ATTEMPTS_DEFAULT = 5 LOG = logging.getLogger(__name__) @@ -35,7 +35,7 @@ class NVMeConnector(base.BaseLinuxConnector): """Connector class to attach/detach NVMe over fabric volumes.""" def __init__(self, root_helper, driver=None, - device_scan_attempts=initiator.DEVICE_SCAN_ATTEMPTS_DEFAULT, + device_scan_attempts=DEVICE_SCAN_ATTEMPTS_DEFAULT, *args, **kwargs): super(NVMeConnector, self).__init__( root_helper, @@ -60,21 +60,28 @@ class NVMeConnector(base.BaseLinuxConnector): nvme_devices = [] pattern = r'/dev/nvme[0-9]n[0-9]' cmd = ['nvme', 'list'] - try: - (out, err) = self._execute(*cmd, root_helper=self._root_helper, - run_as_root=True) - for line in out.split('\n'): - result = re.match(pattern, line) - if result: - nvme_devices.append(result.group(0)) - LOG.debug("_get_nvme_devices returned %(nvme_devices)s", - {'nvme_devices': nvme_devices}) - return nvme_devices + for retry in range(1, self.device_scan_attempts + 1): + try: + (out, err) = self._execute(*cmd, + root_helper=self._root_helper, + run_as_root=True) + for line in out.split('\n'): + result = re.match(pattern, line) + if result: + nvme_devices.append(result.group(0)) + LOG.debug("_get_nvme_devices returned %(nvme_devices)s", + {'nvme_devices': nvme_devices}) + return nvme_devices - except putils.ProcessExecutionError: - LOG.error( - "Failed to list available NVMe connected controllers.") - raise + except putils.ProcessExecutionError: + LOG.warning( + "Failed to list available NVMe connected controllers, " + "retrying.") + time.sleep(retry ** 2) + else: + msg = _("Failed to retrieve available connected NVMe controllers " + "when running nvme list.") + raise exception.CommandExecutionFailed(message=msg) @utils.trace @synchronized('connect_volume') @@ -113,18 +120,12 @@ class NVMeConnector(base.BaseLinuxConnector): "%(conn_nqn)s", {'conn_nqn': conn_nqn}) raise - for retry in range(1, self.device_scan_attempts + 1): - all_nvme_devices = self._get_nvme_devices() - path = set(all_nvme_devices) - set(current_nvme_devices) - if path: - break - time.sleep(retry ** 2) - else: - LOG.error("Failed to retrieve available connected NVMe " - "controllers when running nvme list") + all_nvme_devices = self._get_nvme_devices() + path = set(all_nvme_devices) - set(current_nvme_devices) + path = list(path) + if not path: raise exception.TargetPortalNotFound(target_portal) - path = list(path) LOG.debug("all_nvme_devices are %(all_nvme_devices)s", {'all_nvme_devices': all_nvme_devices}) device_info['path'] = path[0] diff --git a/os_brick/tests/initiator/connectors/test_nvme.py b/os_brick/tests/initiator/connectors/test_nvme.py index 2e0093644..93d942917 100644 --- a/os_brick/tests/initiator/connectors/test_nvme.py +++ b/os_brick/tests/initiator/connectors/test_nvme.py @@ -48,9 +48,10 @@ class NVMeConnectorTestCase(test_connector.ConnectorTestCase): self.assertEqual(expected, actual) @mock.patch.object(nvme.NVMeConnector, '_execute') - def test_get_nvme_devices_raise(self, mock_execute): + @mock.patch('time.sleep') + def test_get_nvme_devices_raise(self, mock_sleep, mock_execute): mock_execute.side_effect = putils.ProcessExecutionError - self.assertRaises(putils.ProcessExecutionError, + self.assertRaises(exception.CommandExecutionFailed, self.connector._get_nvme_devices) @mock.patch.object(nvme.NVMeConnector, '_get_nvme_devices') @@ -83,14 +84,15 @@ class NVMeConnectorTestCase(test_connector.ConnectorTestCase): run_as_root=True) @mock.patch.object(nvme.NVMeConnector, '_execute') - def test_connect_volume_raise(self, mock_execute): + @mock.patch('time.sleep') + def test_connect_volume_raise(self, mock_sleep, mock_execute): connection_properties = {'target_portal': 'portal', 'target_port': 1, 'nqn': 'nqn.volume_123', 'device_path': '', 'transport_type': 'rdma'} mock_execute.side_effect = putils.ProcessExecutionError - self.assertRaises(putils.ProcessExecutionError, + self.assertRaises(exception.CommandExecutionFailed, self.connector.connect_volume, connection_properties) @@ -113,7 +115,8 @@ class NVMeConnectorTestCase(test_connector.ConnectorTestCase): @mock.patch.object(nvme.NVMeConnector, '_get_nvme_devices') @mock.patch.object(nvme.NVMeConnector, '_execute') - def test_disconnect_volume(self, mock_devices, mock_execute): + @mock.patch('time.sleep') + def test_disconnect_volume(self, mock_sleep, mock_execute, mock_devices): connection_properties = {'target_portal': 'portal', 'target_port': 1, 'nqn': 'nqn.volume_123', @@ -130,7 +133,9 @@ class NVMeConnectorTestCase(test_connector.ConnectorTestCase): @mock.patch.object(nvme.NVMeConnector, '_get_nvme_devices') @mock.patch.object(nvme.NVMeConnector, '_execute') - def test_disconnect_volume_raise(self, mock_devices, mock_execute): + @mock.patch('time.sleep') + def test_disconnect_volume_raise( + self, mock_sleep, mock_execute, mock_devices): mock_execute.side_effect = putils.ProcessExecutionError mock_devices.return_value = '/dev/nvme0n1' connection_properties = {'target_portal': 'portal',