From 0d65451cfc7c96f735143c0d2e593abb80beb731 Mon Sep 17 00:00:00 2001 From: Nikolay Fedotov Date: Fri, 15 Dec 2017 15:05:03 +0300 Subject: [PATCH] Add wait_for_disk to destroy_disk_metadata function * Move _wait_for_disk_to_become_available function from DiskPartitioner class to utils.py * Call the wait_for_disk_to_become_available during the destroy_disk_metadata metadata to ensure that nothing is using disk and preventing metadata from being destroyed. Change-Id: I9134b4cfdf15ba9610b4d1f844e64858d69713a5 Closes-Bug: #1739421 --- ironic_lib/disk_partitioner.py | 60 +++--------------- ironic_lib/disk_utils.py | 8 +++ ironic_lib/tests/test_disk_partitioner.py | 15 +++-- ironic_lib/tests/test_disk_utils.py | 19 +++--- ironic_lib/tests/test_utils.py | 76 ++++++++++++++++++++++ ironic_lib/utils.py | 77 +++++++++++++++++++++++ 6 files changed, 187 insertions(+), 68 deletions(-) diff --git a/ironic_lib/disk_partitioner.py b/ironic_lib/disk_partitioner.py index 7120d745..84a3982a 100644 --- a/ironic_lib/disk_partitioner.py +++ b/ironic_lib/disk_partitioner.py @@ -13,12 +13,8 @@ # License for the specific language governing permissions and limitations # under the License. -import re - -from oslo_concurrency import processutils from oslo_config import cfg from oslo_log import log as logging -from oslo_service import loopingcall from ironic_lib.common.i18n import _ from ironic_lib import exception @@ -67,7 +63,6 @@ class DiskPartitioner(object): self._disk_label = disk_label self._alignment = alignment self._partitions = [] - self._fuser_pids_re = re.compile(r'((\d)+\s*)+') def _exec(self, *args): # NOTE(lucasagomes): utils.execute() is already a wrapper on top @@ -110,30 +105,6 @@ class DiskPartitioner(object): """ return enumerate(self._partitions, 1) - def _wait_for_disk_to_become_available(self, retries, max_retries, pids, - stderr): - retries[0] += 1 - if retries[0] > max_retries: - raise loopingcall.LoopingCallDone() - - try: - # NOTE(ifarkas): fuser returns a non-zero return code if none of - # the specified files is accessed - out, err = utils.execute('fuser', self._device, - check_exit_code=[0, 1], run_as_root=True) - - if not out and not err: - raise loopingcall.LoopingCallDone() - else: - if err: - stderr[0] = err - if out: - pids_match = re.search(self._fuser_pids_re, out) - pids[0] = pids_match.group() - except processutils.ProcessExecutionError as exc: - LOG.warning('Failed to check the device %(device)s with fuser:' - ' %(err)s', {'device': self._device, 'err': exc}) - def commit(self): """Write to the disk.""" LOG.debug("Committing partitions to disk.") @@ -151,30 +122,13 @@ class DiskPartitioner(object): self._exec(*cmd_args) - retries = [0] - pids = [''] - fuser_err = [''] - interval = CONF.disk_partitioner.check_device_interval - max_retries = CONF.disk_partitioner.check_device_max_retries - - timer = loopingcall.FixedIntervalLoopingCall( - self._wait_for_disk_to_become_available, - retries, max_retries, pids, fuser_err) - timer.start(interval=interval).wait() - - if retries[0] > max_retries: - if pids[0]: - raise exception.InstanceDeployFailure( - _('Disk partitioning failed on device %(device)s. ' - 'Processes with the following PIDs are holding it: ' - '%(pids)s. Time out waiting for completion.') - % {'device': self._device, 'pids': pids[0]}) - else: - raise exception.InstanceDeployFailure( - _('Disk partitioning failed on device %(device)s. Fuser ' - 'exited with "%(fuser_err)s". Time out waiting for ' - 'completion.') - % {'device': self._device, 'fuser_err': fuser_err[0]}) + try: + utils.wait_for_disk_to_become_available(self._device) + except exception.IronicException as e: + raise exception.InstanceDeployFailure( + _('Disk partitioning failed on device %(device)s. ' + 'Error: %(error)s') + % {'device': self._device, 'error': e}) def list_opts(): diff --git a/ironic_lib/disk_utils.py b/ironic_lib/disk_utils.py index c8115512..221170a9 100644 --- a/ironic_lib/disk_utils.py +++ b/ironic_lib/disk_utils.py @@ -378,6 +378,14 @@ def destroy_disk_metadata(dev, node_uuid): utils.execute('sgdisk', '-Z', dev, run_as_root=True, use_standard_locale=True) + try: + utils.wait_for_disk_to_become_available(dev) + except exception.IronicException as e: + raise exception.InstanceDeployFailure( + _('Destroying metadata failed on device %(device)s. ' + 'Error: %(error)s') + % {'device': dev, 'error': e}) + LOG.info("Disk metadata on %(dev)s successfully destroyed for node " "%(node)s", {'dev': dev, 'node': node_uuid}) diff --git a/ironic_lib/tests/test_disk_partitioner.py b/ironic_lib/tests/test_disk_partitioner.py index 47315220..ea74e0aa 100644 --- a/ironic_lib/tests/test_disk_partitioner.py +++ b/ironic_lib/tests/test_disk_partitioner.py @@ -81,7 +81,8 @@ class DiskPartitionerTestCase(base.IronicLibTestCase): 'mkpart', 'fake-type', 'fake-fs-type', '3', '4', 'set', '3', 'bios_grub', 'on') mock_utils_exc.assert_called_once_with( - 'fuser', '/dev/fake', run_as_root=True, check_exit_code=[0, 1]) + 'fuser', '/dev/fake', run_as_root=True, + check_exit_code=[0, 1]) @mock.patch.object(eventlet.greenthread, 'sleep', lambda seconds: None) @mock.patch.object(disk_partitioner.DiskPartitioner, '_exec', @@ -98,6 +99,9 @@ class DiskPartitionerTestCase(base.IronicLibTestCase): 'fs_type': 'fake-fs-type', 'type': 'fake-type', 'size': 1})] + # TODO(TheJulia): fuser man page indicates only pids are returned via + # stdout. Meaning tests that put the device on stdout need to be + # corrected. fuser_outputs = iter([("/dev/fake: 10000 10001", None), (None, None)]) with mock.patch.object(dp, 'get_partitions', autospec=True) as mock_gp: @@ -111,7 +115,8 @@ class DiskPartitionerTestCase(base.IronicLibTestCase): 'mkpart', 'fake-type', 'fake-fs-type', '2', '3', 'set', '2', 'boot', 'on') mock_utils_exc.assert_called_with( - 'fuser', '/dev/fake', run_as_root=True, check_exit_code=[0, 1]) + 'fuser', '/dev/fake', run_as_root=True, + check_exit_code=[0, 1]) self.assertEqual(2, mock_utils_exc.call_count) @mock.patch.object(eventlet.greenthread, 'sleep', lambda seconds: None) @@ -141,7 +146,8 @@ class DiskPartitionerTestCase(base.IronicLibTestCase): 'mkpart', 'fake-type', 'fake-fs-type', '2', '3', 'set', '2', 'boot', 'on') mock_utils_exc.assert_called_with( - 'fuser', '/dev/fake', run_as_root=True, check_exit_code=[0, 1]) + 'fuser', '/dev/fake', run_as_root=True, + check_exit_code=[0, 1]) self.assertEqual(20, mock_utils_exc.call_count) # Mock the eventlet.greenthread.sleep for the looping_call @@ -173,5 +179,6 @@ class DiskPartitionerTestCase(base.IronicLibTestCase): 'mkpart', 'fake-type', 'fake-fs-type', '2', '3', 'set', '2', 'boot', 'on') mock_utils_exc.assert_called_with( - 'fuser', '/dev/fake', run_as_root=True, check_exit_code=[0, 1]) + 'fuser', '/dev/fake', run_as_root=True, + check_exit_code=[0, 1]) self.assertEqual(20, mock_utils_exc.call_count) diff --git a/ironic_lib/tests/test_disk_utils.py b/ironic_lib/tests/test_disk_utils.py index 6d6a33b8..2433a087 100644 --- a/ironic_lib/tests/test_disk_utils.py +++ b/ironic_lib/tests/test_disk_utils.py @@ -23,7 +23,6 @@ import mock from oslo_concurrency import processutils from oslo_config import cfg from oslo_serialization import base64 -from oslo_service import loopingcall from oslo_utils import imageutils import requests @@ -465,7 +464,7 @@ class MakePartitionsTestCase(base.IronicLibTestCase): self.assertEqual(expected_result, result) -@mock.patch.object(utils, 'execute', autospec=True) +@mock.patch.object(utils, 'execute', autospec=True, return_value=('', '')) class DestroyMetaDataTestCase(base.IronicLibTestCase): def setUp(self): @@ -479,7 +478,10 @@ class DestroyMetaDataTestCase(base.IronicLibTestCase): use_standard_locale=True), mock.call('sgdisk', '-Z', 'fake-dev', run_as_root=True, - use_standard_locale=True)] + use_standard_locale=True), + mock.call('fuser', self.dev, + check_exit_code=[0, 1], + run_as_root=True)] disk_utils.destroy_disk_metadata(self.dev, self.node_uuid) mock_exec.assert_has_calls(expected_calls) @@ -514,7 +516,8 @@ class DestroyMetaDataTestCase(base.IronicLibTestCase): mock_exec.side_effect = iter( [processutils.ProcessExecutionError(description='--force'), (None, None), - (None, None)]) + (None, None), + ('', '')]) expected_call = [mock.call('wipefs', '--force', '--all', 'fake-dev', run_as_root=True, @@ -562,13 +565,7 @@ class PopulateImageTestCase(base.IronicLibTestCase): self.assertFalse(mock_dd.called) -def _looping_call_done(*args, **kwargs): - raise loopingcall.LoopingCallDone() - - -@mock.patch.object(disk_partitioner.DiskPartitioner, - '_wait_for_disk_to_become_available', - _looping_call_done) +@mock.patch.object(utils, 'wait_for_disk_to_become_available', lambda *_: None) @mock.patch.object(disk_utils, 'is_block_device', lambda d: True) @mock.patch.object(disk_utils, 'block_uuid', lambda p: 'uuid') @mock.patch.object(disk_utils, 'dd', lambda *_: None) diff --git a/ironic_lib/tests/test_utils.py b/ironic_lib/tests/test_utils.py index 16af919b..9848f40b 100644 --- a/ironic_lib/tests/test_utils.py +++ b/ironic_lib/tests/test_utils.py @@ -463,3 +463,79 @@ class MatchRootDeviceTestCase(base.IronicLibTestCase): dev = utils.match_root_device_hints(empty_dev, {'model': 'foo'}) self.assertIsNone(dev) self.assertTrue(mock_warn.called) + + +class WaitForDisk(base.IronicLibTestCase): + + @mock.patch.object(utils, 'execute', autospec=True) + def test_wait_for_disk_to_become_available(self, mock_exc): + mock_exc.return_value = ('', '') + utils.wait_for_disk_to_become_available('fake-dev') + fuser_cmd = ['fuser', 'fake-dev'] + fuser_call = mock.call(*fuser_cmd, run_as_root=True, + check_exit_code=[0, 1]) + self.assertEqual(1, mock_exc.call_count) + mock_exc.assert_has_calls([fuser_call]) + + @mock.patch.object(utils, 'execute', autospec=True, + side_effect=processutils.ProcessExecutionError( + stderr='fake')) + def test_wait_for_disk_to_become_available_no_fuser(self, mock_exc): + CONF.disk_partitioner.check_device_max_retries = 2 + self.assertRaises(exception.IronicException, + utils.wait_for_disk_to_become_available, + 'fake-dev') + fuser_cmd = ['fuser', 'fake-dev'] + fuser_call = mock.call(*fuser_cmd, run_as_root=True, + check_exit_code=[0, 1]) + self.assertEqual(2, mock_exc.call_count) + mock_exc.assert_has_calls([fuser_call, fuser_call]) + + @mock.patch.object(utils, 'execute', autospec=True) + def test_wait_for_disk_to_become_available_device_in_use(self, mock_exc): + # NOTE(TheJulia): Looks like fuser returns the actual list of pids + # in the stdout output, where as all other text is returned in + # stderr. + CONF.disk_partitioner.check_device_interval = .01 + CONF.disk_partitioner.check_device_max_retries = 2 + + mock_exc.side_effect = [(' 1234 ', 'fake-dev: '), + (' 15503 3919 15510 15511', 'fake-dev:')] + expected_error = ('Processes with the following PIDs are ' + 'holding device fake-dev: 15503, 3919, 15510, ' + '15511. Timed out waiting for completion.') + self.assertRaisesRegex( + exception.IronicException, + expected_error, + utils.wait_for_disk_to_become_available, + 'fake-dev') + fuser_cmd = ['fuser', 'fake-dev'] + fuser_call = mock.call(*fuser_cmd, run_as_root=True, + check_exit_code=[0, 1]) + self.assertEqual(2, mock_exc.call_count) + mock_exc.assert_has_calls([fuser_call, fuser_call]) + + @mock.patch.object(utils, 'execute', autospec=True) + def test_wait_for_disk_to_become_available_no_device(self, mock_exc): + # NOTE(TheJulia): Looks like fuser returns the actual list of pids + # in the stdout output, where as all other text is returned in + # stderr. + CONF.disk_partitioner.check_device_interval = .01 + CONF.disk_partitioner.check_device_max_retries = 2 + + mock_exc.return_value = ('', 'Specified filename /dev/fake ' + 'does not exist.') + expected_error = ('Fuser exited with "Specified filename ' + '/dev/fake does not exist." while checking ' + 'locks for device fake-dev. Timed out waiting ' + 'for completion.') + self.assertRaisesRegex( + exception.IronicException, + expected_error, + utils.wait_for_disk_to_become_available, + 'fake-dev') + fuser_cmd = ['fuser', 'fake-dev'] + fuser_call = mock.call(*fuser_cmd, run_as_root=True, + check_exit_code=[0, 1]) + self.assertEqual(2, mock_exc.call_count) + mock_exc.assert_has_calls([fuser_call, fuser_call]) diff --git a/ironic_lib/utils.py b/ironic_lib/utils.py index 19b06e12..68f3febb 100644 --- a/ironic_lib/utils.py +++ b/ironic_lib/utils.py @@ -26,6 +26,7 @@ import re from oslo_concurrency import processutils from oslo_config import cfg +from oslo_service import loopingcall from oslo_utils import excutils from oslo_utils import specs_matcher from oslo_utils import strutils @@ -417,3 +418,79 @@ def match_root_device_hints(devices, root_device_hints): return dev LOG.warning('No device found that matches the root device hints') + + +def wait_for_disk_to_become_available(device): + """Wait for a disk device to become available. + + Waits for a disk device to become available for use by + waiting until all process locks on the device have been + released. + + Timeout and iteration settings come from the configuration + options used by the in-library disk_partitioner: + ``check_device_interval`` and ``check_device_max_retries``. + + :params device: The path to the device. + :raises: IronicException If the disk fails to become + available. + """ + retries = [0] + pids = [''] + stderr = [''] + interval = CONF.disk_partitioner.check_device_interval + max_retries = CONF.disk_partitioner.check_device_max_retries + + def _wait_for_disk(device, retries, max_retries, pids, stderr): + # A regex is likely overkill here, but variations in fuser + # means we should likely use it. + fuser_pids_re = re.compile(r'\d+') + + retries[0] += 1 + if retries[0] > max_retries: + raise loopingcall.LoopingCallDone() + + try: + # NOTE(ifarkas): fuser returns a non-zero return code if none of + # the specified files is accessed. + # NOTE(TheJulia): fuser does not report LVM devices as in use + # unless the LVM device-mapper device is the + # device that is directly polled. + # NOTE(TheJulia): The -m flag allows fuser to reveal data about + # mounted filesystems, which should be considered + # busy/locked. That being said, it is not used + # because busybox fuser has a different behavior. + # NOTE(TheJuia): fuser outputs a list of found PIDs to stdout. + # All other text is returned via stderr, and the + # output to a terminal is merged as a result. + out, err = execute('fuser', device, check_exit_code=[0, 1], + run_as_root=True) + + if err: + stderr[0] = err + if out: + pids[0] = fuser_pids_re.findall(out) + if not out and not err: + raise loopingcall.LoopingCallDone() + except processutils.ProcessExecutionError as exc: + LOG.warning('Failed to check the device %(device)s with fuser:' + ' %(err)s', {'device': device, 'err': exc}) + + timer = loopingcall.FixedIntervalLoopingCall( + _wait_for_disk, + device, retries, max_retries, pids, stderr) + timer.start(interval=interval).wait() + + if retries[0] > max_retries: + if pids[0]: + raise exception.IronicException( + _('Processes with the following PIDs are holding ' + 'device %(device)s: %(pids)s. ' + 'Timed out waiting for completion.') + % {'device': device, 'pids': ', '.join(pids[0])}) + else: + raise exception.IronicException( + _('Fuser exited with "%(fuser_err)s" while checking ' + 'locks for device %(device)s. Timed out waiting for ' + 'completion.') + % {'device': device, 'fuser_err': stderr[0]})