From 4478433550ddab970f106636a4ba5fbba82c5e39 Mon Sep 17 00:00:00 2001 From: Takashi Kajinami Date: Mon, 15 Feb 2021 10:58:13 +0900 Subject: [PATCH] Avoid unhandled exceptions during connecting to iSCSI portals Currently we don't properly catch some possible exceptions during connectiing to iSCSI portals, like failures in "iscsiadm -m session". Because of this _connect_vol threads can abort unexpectedly in some failure patterns, and this abort causes hung in subsequent steps waiting for results from _connct_vol threads. This change ensures that any exceptions during connecting to iSCSI portals are handled in the _connect_vol method corectly, to avoid unexpected abort without updating thread results. Closes-Bug: #1915678 Change-Id: I0278c502806b99f8ec65cb146e3852e43031e9b8 --- os_brick/initiator/connectors/iscsi.py | 25 +++++++++++-------- .../tests/initiator/connectors/test_iscsi.py | 12 +++++++++ .../notes/bug-1915678-901a6bd24ecede72.yaml | 7 ++++++ 3 files changed, 34 insertions(+), 10 deletions(-) create mode 100644 releasenotes/notes/bug-1915678-901a6bd24ecede72.yaml diff --git a/os_brick/initiator/connectors/iscsi.py b/os_brick/initiator/connectors/iscsi.py index 7c0d327a9..8060a7b70 100644 --- a/os_brick/initiator/connectors/iscsi.py +++ b/os_brick/initiator/connectors/iscsi.py @@ -626,18 +626,23 @@ class ISCSIConnector(base.BaseLinuxConnector, base_iscsi.BaseISCSIConnector): """ device = hctl = None portal = props['target_portal'] - session, manual_scan = self._connect_to_iscsi_portal(props) - do_scans = rescans > 0 or manual_scan - # Scan is sent on connect by iscsid, but we must do it manually on - # manual scan mode. This scan cannot count towards total rescans. - if manual_scan: - num_rescans = -1 - seconds_next_scan = 0 - else: - num_rescans = 0 - seconds_next_scan = 4 + try: + session, manual_scan = self._connect_to_iscsi_portal(props) + except Exception: + LOG.exception('Exception connecting to %s', portal) + session = None if session: + do_scans = rescans > 0 or manual_scan + # Scan is sent on connect by iscsid, but we must do it manually on + # manual scan mode. This scan cannot count towards total rescans. + if manual_scan: + num_rescans = -1 + seconds_next_scan = 0 + else: + num_rescans = 0 + seconds_next_scan = 4 + data['num_logins'] += 1 LOG.debug('Connected to %s', portal) while do_scans: diff --git a/os_brick/tests/initiator/connectors/test_iscsi.py b/os_brick/tests/initiator/connectors/test_iscsi.py index b053277a8..5554ff0e4 100644 --- a/os_brick/tests/initiator/connectors/test_iscsi.py +++ b/os_brick/tests/initiator/connectors/test_iscsi.py @@ -1518,6 +1518,18 @@ Setting up iSCSI targets: unused expected.update(failed_logins=1, stopped_threads=1) self.assertDictEqual(expected, data) + @mock.patch.object(iscsi.ISCSIConnector, '_connect_to_iscsi_portal') + def test_connect_vol_with_connection_failure(self, connect_mock): + data = self._get_connect_vol_data() + + connect_mock.side_effect = Exception() + + self.connector._connect_vol(3, self.CON_PROPS, data) + + expected = self._get_connect_vol_data() + expected.update(failed_logins=1, stopped_threads=1) + self.assertDictEqual(expected, data) + @mock.patch('os_brick.utils._time_sleep', mock.Mock()) @mock.patch.object(linuxscsi.LinuxSCSI, 'scan_iscsi') @mock.patch.object(linuxscsi.LinuxSCSI, 'device_name_by_hctl', diff --git a/releasenotes/notes/bug-1915678-901a6bd24ecede72.yaml b/releasenotes/notes/bug-1915678-901a6bd24ecede72.yaml new file mode 100644 index 000000000..6e6f3d0ee --- /dev/null +++ b/releasenotes/notes/bug-1915678-901a6bd24ecede72.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + `Bug #1915678 `_: Fix + unhandled exception during iscsi volume attachment with multipath enabled + that resulted in the cinder-volume service becoming stuck and requiring + a restart.