From 8832c5389964973084cd0f80a2d8f7f24255ea3f Mon Sep 17 00:00:00 2001 From: Sophie Huang Date: Tue, 21 Sep 2021 23:05:44 +0000 Subject: [PATCH] multipath/iscsi: iSCSI connections are not reinitiated after reboot After compute host reboot, in an iSCSI/multipath environment, some of the connections to the iSCSI portal are not reinitiated and missing iSCSI devices are observed. This patchset introduced retries for this particular scenario. Closes-Bug: #1944474 Change-Id: I60ee7421f7b792e8324286908a9fdd8fb53e433e --- os_brick/initiator/connectors/iscsi.py | 24 ++++++++++++-- .../tests/initiator/connectors/test_iscsi.py | 32 +++++++++++++++++++ .../notes/bug-1944474-55c5ebb3a37801aa.yaml | 6 ++++ 3 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 releasenotes/notes/bug-1944474-55c5ebb3a37801aa.yaml diff --git a/os_brick/initiator/connectors/iscsi.py b/os_brick/initiator/connectors/iscsi.py index aaff17ac6..b378a38cd 100644 --- a/os_brick/initiator/connectors/iscsi.py +++ b/os_brick/initiator/connectors/iscsi.py @@ -1051,6 +1051,16 @@ class ISCSIConnector(base.BaseLinuxConnector, base_iscsi.BaseISCSIConnector): return ips, iqns def _connect_to_iscsi_portal(self, connection_properties): + """Safely connect to iSCSI portal-target and return the session id.""" + portal = connection_properties['target_portal'].split(",")[0] + target_iqn = connection_properties['target_iqn'] + + lock_name = f'connect_to_iscsi_portal-{portal}-{target_iqn}' + method = synchronized(lock_name)(self._connect_to_iscsi_portal_unsafe) + return method(connection_properties) + + @utils.retry((exception.BrickException)) + def _connect_to_iscsi_portal_unsafe(self, connection_properties): """Connect to an iSCSI portal-target an return the session id.""" portal = connection_properties['target_portal'].split(",")[0] target_iqn = connection_properties['target_iqn'] @@ -1066,9 +1076,17 @@ class ISCSIConnector(base.BaseLinuxConnector, base_iscsi.BaseISCSIConnector): out, err = self._run_iscsiadm(connection_properties, (), check_exit_code=(0, 21, 255)) if err: - self._run_iscsiadm(connection_properties, - ('--interface', self._get_transport(), - '--op', 'new')) + out_new, err_new = self._run_iscsiadm(connection_properties, + ('--interface', + self._get_transport(), + '--op', 'new'), + check_exit_code=(0, 6)) + if err_new: + # retry if iscsiadm returns 6 for "database failure" + LOG.debug("Retrying to connect to iSCSI portal %s", portal) + msg = (_("Encountered database failure for %s.") % (portal)) + raise exception.BrickException(msg=msg) + # Try to set the scan mode to manual res = self._iscsiadm_update(connection_properties, 'node.session.scan', 'manual', diff --git a/os_brick/tests/initiator/connectors/test_iscsi.py b/os_brick/tests/initiator/connectors/test_iscsi.py index 48558cc38..ca33a2d84 100644 --- a/os_brick/tests/initiator/connectors/test_iscsi.py +++ b/os_brick/tests/initiator/connectors/test_iscsi.py @@ -1113,6 +1113,38 @@ Setting up iSCSI targets: unused self.assertListEqual(expected_cmds, actual_cmds) get_sessions_mock.assert_called_once_with() + @mock.patch.object(iscsi.ISCSIConnector, '_iscsiadm_update') + @mock.patch.object(iscsi.ISCSIConnector, '_get_transport', + return_value='default') + @mock.patch.object(iscsi.ISCSIConnector, '_get_iscsi_sessions_full') + @mock.patch('os_brick.utils._time_sleep') + def test_connect_to_iscsi_portal_fail_op_new(self, sleep_mock, + get_sessions_mock, + get_transport_mock, + iscsiadm_update_mock): + get_sessions_mock.return_value = [] + with mock.patch.object(self.connector, '_execute') as exec_mock: + exec_mock.side_effect = [('', 21), ('', 6), ('', 21), ('', 6), + ('', 21), ('', 6)] + self.assertRaises(exception.BrickException, + self.connector._connect_to_iscsi_portal, + self.CON_PROPS) + expected_cmds = ['iscsiadm -m node -T tgt1 -p ip1:port1', + 'iscsiadm -m node -T tgt1 -p ip1:port1 ' + '--interface default --op new', + 'iscsiadm -m node -T tgt1 -p ip1:port1', + 'iscsiadm -m node -T tgt1 -p ip1:port1 ' + '--interface default --op new', + 'iscsiadm -m node -T tgt1 -p ip1:port1', + 'iscsiadm -m node -T tgt1 -p ip1:port1 ' + '--interface default --op new'] + actual_cmds = [' '.join(args[0]) for args in exec_mock.call_args_list] + self.assertListEqual(expected_cmds, actual_cmds) + iscsiadm_update_mock.assert_not_called() + + # Called twice by the retry mechanism + self.assertEqual(2, sleep_mock.call_count) + @mock.patch.object(linuxscsi.LinuxSCSI, 'get_sysfs_wwn', side_effect=(None, 'tgt2')) @mock.patch.object(iscsi.ISCSIConnector, '_connect_vol') diff --git a/releasenotes/notes/bug-1944474-55c5ebb3a37801aa.yaml b/releasenotes/notes/bug-1944474-55c5ebb3a37801aa.yaml new file mode 100644 index 000000000..3aa515ed8 --- /dev/null +++ b/releasenotes/notes/bug-1944474-55c5ebb3a37801aa.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + `Bug #1944474 `_: Fixed + missing retries to reinitiate iSCSI connections with high concurrency of + connections and with multipath enabled.