diff --git a/etc/ironic/ironic.conf.sample b/etc/ironic/ironic.conf.sample index 39b712e37b..33f55c5170 100644 --- a/etc/ironic/ironic.conf.sample +++ b/etc/ironic/ironic.conf.sample @@ -652,6 +652,25 @@ #db_max_retries=20 +[disk_partitioner] + +# +# Options defined in ironic.common.disk_partitioner +# + +# After Ironic has completed creating the partition table, it +# continues to check for activity on the attached iSCSI device +# status at this interval prior to copying the image to the +# node, in seconds (integer value) +#check_device_interval=1 + +# The maximum number of times to check that the device is not +# accessed by another process. If the device is still busy +# after that, the disk partitioning will be treated as having +# failed. (integer value) +#check_device_max_retries=20 + + [glance] # diff --git a/etc/ironic/rootwrap.d/ironic-utils.filters b/etc/ironic/rootwrap.d/ironic-utils.filters index 2080521d38..2695406aa0 100644 --- a/etc/ironic/rootwrap.d/ironic-utils.filters +++ b/etc/ironic/rootwrap.d/ironic-utils.filters @@ -13,4 +13,5 @@ mkswap: CommandFilter, mkswap, root mkfs: CommandFilter, mkfs, root # ironic/common/disk_partitioner.py +fuser: CommandFilter, fuser, root parted: CommandFilter, parted, root diff --git a/ironic/common/disk_partitioner.py b/ironic/common/disk_partitioner.py index 9d559348f3..6b8e695159 100644 --- a/ironic/common/disk_partitioner.py +++ b/ironic/common/disk_partitioner.py @@ -14,11 +14,35 @@ # under the License. import re -import time +from oslo.config import cfg + +from ironic.common import exception from ironic.common import utils from ironic.openstack.common import log as logging +from ironic.openstack.common import loopingcall +from ironic.openstack.common import processutils +opts = [ + cfg.IntOpt('check_device_interval', + default=1, + help='After Ironic has completed creating the partition table, ' + 'it continues to check for activity on the attached iSCSI ' + 'device status at this interval prior to copying the image' + ' to the node, in seconds'), + cfg.IntOpt('check_device_max_retries', + default=20, + help='The maximum number of times to check that the device is ' + 'not accessed by another process. If the device is still ' + 'busy after that, the disk partitioning will be treated as' + ' having failed.'), +] + +CONF = cfg.CONF +opt_group = cfg.OptGroup(name='disk_partitioner', + title='Options for the disk partitioner') +CONF.register_group(opt_group) +CONF.register_opts(opts, opt_group) LOG = logging.getLogger(__name__) @@ -41,6 +65,7 @@ class DiskPartitioner(object): self._disk_label = disk_label self._alignment = alignment self._partitions = [] + self._fuser_pids_re = re.compile(r'((\d)+\s*)+') def _exec(self, *args): # NOTE(lucasagomes): utils.execute() is already a wrapper on top @@ -82,6 +107,30 @@ class DiskPartitioner(object): """ return enumerate(self._partitions, 1) + def _wait_for_disk_to_become_available(self, retries, max_retries, pids, + stderr): + retries[0] += 1 + if retries[0] > max_retries: + raise loopingcall.LoopingCallDone() + + try: + # NOTE(ifarkas): fuser returns a non-zero return code if none of + # the specified files is accessed + out, err = utils.execute('fuser', self._device, + check_exit_code=[0, 1], run_as_root=True) + + if not out and not err: + raise loopingcall.LoopingCallDone() + else: + if err: + stderr[0] = err + if out: + pids_match = re.search(self._fuser_pids_re, out) + pids[0] = pids_match.group() + except processutils.ProcessExecutionError as exc: + LOG.warning(_('Failed to check the device %(device)s with fuser: ' + ' %(err)s') % {'device': self._device, 'err': exc}) + def commit(self): """Write to the disk.""" cmd_args = ['mklabel', self._disk_label] @@ -97,10 +146,31 @@ class DiskPartitioner(object): start = end self._exec(*cmd_args) - # TODO(lucasagomes): Do not sleep, use another mechanism to avoid - # the "device is busy" problem. lsof, fuser... - # avoid "device is busy" - time.sleep(3) + + retries = [0] + pids = [''] + fuser_err = [''] + interval = CONF.disk_partitioner.check_device_interval + max_retries = CONF.disk_partitioner.check_device_max_retries + + timer = loopingcall.FixedIntervalLoopingCall( + self._wait_for_disk_to_become_available, + retries, max_retries, pids, fuser_err) + timer.start(interval=interval).wait() + + if retries[0] > max_retries: + if pids[0]: + raise exception.InstanceDeployFailure( + _('Disk partitioning failed on device %(device)s. ' + 'Processes with the following PIDs are holding it: ' + '%(pids)s. Time out waiting for completion.') + % {'device': self._device, 'pids': pids[0]}) + else: + raise exception.InstanceDeployFailure( + _('Disk partitioning failed on device %(device)s. Fuser ' + 'exited with "%(fuser_err)s". Time out waiting for ' + 'completion.') + % {'device': self._device, 'fuser_err': fuser_err[0]}) _PARTED_PRINT_RE = re.compile(r"^\d+:([\d\.]+)MiB:" diff --git a/ironic/tests/drivers/test_deploy_utils.py b/ironic/tests/drivers/test_deploy_utils.py index 8d31c0d291..062c26e0b1 100644 --- a/ironic/tests/drivers/test_deploy_utils.py +++ b/ironic/tests/drivers/test_deploy_utils.py @@ -14,10 +14,8 @@ # License for the specific language governing permissions and limitations # under the License. -import itertools -import time - import fixtures +import itertools import mock import os import tempfile @@ -490,7 +488,6 @@ class WorkOnDiskTestCase(tests_base.TestCase): commit=True) -@mock.patch.object(time, 'sleep', lambda _: None) @mock.patch.object(common_utils, 'execute') class MakePartitionsTestCase(tests_base.TestCase): @@ -504,13 +501,19 @@ class MakePartitionsTestCase(tests_base.TestCase): '--', 'unit', 'MiB', 'mklabel', 'msdos'] def test_make_partitions(self, mock_exc): - expected_mkpart = ['mkpart', 'primary', 'linux-swap', '1', '513', - 'mkpart', 'primary', '', '513', '1537'] - cmd = self.parted_static_cmd + expected_mkpart + mock_exc.return_value = (None, None) utils.make_partitions(self.dev, self.root_mb, self.swap_mb, self.ephemeral_mb) - mock_exc.assert_called_once_with(*cmd, run_as_root=True, - check_exit_code=[0]) + + expected_mkpart = ['mkpart', 'primary', 'linux-swap', '1', '513', + 'mkpart', 'primary', '', '513', '1537'] + parted_cmd = self.parted_static_cmd + expected_mkpart + parted_call = mock.call(*parted_cmd, run_as_root=True, + check_exit_code=[0]) + fuser_cmd = ['fuser', 'fake-dev'] + fuser_call = mock.call(*fuser_cmd, run_as_root=True, + check_exit_code=[0, 1]) + mock_exc.assert_has_calls([parted_call, fuser_call]) def test_make_partitions_with_ephemeral(self, mock_exc): self.ephemeral_mb = 2048 @@ -518,10 +521,12 @@ class MakePartitionsTestCase(tests_base.TestCase): 'mkpart', 'primary', 'linux-swap', '2049', '2561', 'mkpart', 'primary', '', '2561', '3585'] cmd = self.parted_static_cmd + expected_mkpart + mock_exc.return_value = (None, None) utils.make_partitions(self.dev, self.root_mb, self.swap_mb, self.ephemeral_mb) - mock_exc.assert_called_once_with(*cmd, run_as_root=True, - check_exit_code=[0]) + + parted_call = mock.call(*cmd, run_as_root=True, check_exit_code=[0]) + mock_exc.assert_has_calls(parted_call) @mock.patch.object(utils, 'get_dev_block_size') @@ -592,8 +597,6 @@ class GetDeviceBlockSizeTestCase(tests_base.TestCase): @mock.patch.object(utils, 'block_uuid', lambda p: 'uuid') @mock.patch.object(utils, 'dd', lambda *_: None) @mock.patch.object(common_utils, 'mkfs', lambda *_: None) -# TODO(dtantsur): remove once https://review.openstack.org/90126 lands -@mock.patch.object(time, 'sleep', lambda *_: None) # NOTE(dtantsur): destroy_disk_metadata resets file size, disabling it @mock.patch.object(utils, 'destroy_disk_metadata', lambda *_: None) class RealFilePartitioningTestCase(tests_base.TestCase): @@ -611,8 +614,10 @@ class RealFilePartitioningTestCase(tests_base.TestCase): common_utils.execute('parted', '--version') except OSError as exc: self.skipTest('parted utility was not found: %s' % exc) - self.file = tempfile.NamedTemporaryFile() - self.addCleanup(lambda: self.file.close()) + self.file = tempfile.NamedTemporaryFile(delete=False) + # NOTE(ifarkas): the file needs to be closed, so fuser won't report + # any usage + self.file.close() # NOTE(dtantsur): 20 MiB file with zeros common_utils.execute('dd', 'if=/dev/zero', 'of=%s' % self.file.name, 'bs=1', 'count=0', 'seek=20MiB') diff --git a/ironic/tests/test_disk_partitioner.py b/ironic/tests/test_disk_partitioner.py index ad6107dc23..8cbf273420 100644 --- a/ironic/tests/test_disk_partitioner.py +++ b/ironic/tests/test_disk_partitioner.py @@ -13,20 +13,27 @@ # License for the specific language governing permissions and limitations # under the License. -import time - +import fixtures import mock from testtools.matchers import HasLength from ironic.common import disk_partitioner +from ironic.common import exception from ironic.common import utils from ironic.tests import base -@mock.patch.object(time, 'sleep', lambda _: None) -@mock.patch.object(utils, 'execute', lambda _: None) class DiskPartitionerTestCase(base.TestCase): + def setUp(self): + super(DiskPartitionerTestCase, self).setUp() + + def noop(*args, **kwargs): + pass + + self.useFixture(fixtures.MonkeyPatch('eventlet.greenthread.sleep', + noop)) + def test_add_partition(self): dp = disk_partitioner.DiskPartitioner('/dev/fake') dp.add_partition(1024) @@ -49,7 +56,8 @@ class DiskPartitionerTestCase(base.TestCase): self.assertEqual(expected, partitions) @mock.patch.object(disk_partitioner.DiskPartitioner, '_exec') - def test_commit(self, mock_exec): + @mock.patch.object(utils, 'execute') + def test_commit(self, mock_utils_exc, mock_disk_partitioner_exec): dp = disk_partitioner.DiskPartitioner('/dev/fake') fake_parts = [(1, {'bootable': False, 'fs_type': 'fake-fs-type', @@ -61,11 +69,98 @@ class DiskPartitionerTestCase(base.TestCase): 'size': 1})] with mock.patch.object(dp, 'get_partitions') as mock_gp: mock_gp.return_value = fake_parts + mock_utils_exc.return_value = (None, None) dp.commit() - mock_exec.assert_called_once_with('mklabel', 'msdos', - 'mkpart', 'fake-type', 'fake-fs-type', '1', '2', - 'mkpart', 'fake-type', 'fake-fs-type', '2', '3', - 'set', '2', 'boot', 'on') + + mock_disk_partitioner_exec.assert_called_once_with('mklabel', 'msdos', + 'mkpart', 'fake-type', 'fake-fs-type', '1', '2', + 'mkpart', 'fake-type', 'fake-fs-type', '2', '3', + 'set', '2', 'boot', 'on') + mock_utils_exc.assert_called_once_with('fuser', '/dev/fake', + run_as_root=True, check_exit_code=[0, 1]) + + @mock.patch.object(disk_partitioner.DiskPartitioner, '_exec') + @mock.patch.object(utils, 'execute') + def test_commit_with_device_is_busy_once(self, mock_utils_exc, + mock_disk_partitioner_exec): + dp = disk_partitioner.DiskPartitioner('/dev/fake') + fake_parts = [(1, {'bootable': False, + 'fs_type': 'fake-fs-type', + 'type': 'fake-type', + 'size': 1}), + (2, {'bootable': True, + 'fs_type': 'fake-fs-type', + 'type': 'fake-type', + 'size': 1})] + fuser_outputs = [("/dev/fake: 10000 10001", None), (None, None)] + + with mock.patch.object(dp, 'get_partitions') as mock_gp: + mock_gp.return_value = fake_parts + mock_utils_exc.side_effect = fuser_outputs + dp.commit() + + mock_disk_partitioner_exec.assert_called_once_with('mklabel', 'msdos', + 'mkpart', 'fake-type', 'fake-fs-type', '1', '2', + 'mkpart', 'fake-type', 'fake-fs-type', '2', '3', + 'set', '2', 'boot', 'on') + mock_utils_exc.assert_called_with('fuser', '/dev/fake', + run_as_root=True, check_exit_code=[0, 1]) + self.assertEqual(2, mock_utils_exc.call_count) + + @mock.patch.object(disk_partitioner.DiskPartitioner, '_exec') + @mock.patch.object(utils, 'execute') + def test_commit_with_device_is_always_busy(self, mock_utils_exc, + mock_disk_partitioner_exec): + dp = disk_partitioner.DiskPartitioner('/dev/fake') + fake_parts = [(1, {'bootable': False, + 'fs_type': 'fake-fs-type', + 'type': 'fake-type', + 'size': 1}), + (2, {'bootable': True, + 'fs_type': 'fake-fs-type', + 'type': 'fake-type', + 'size': 1})] + + with mock.patch.object(dp, 'get_partitions') as mock_gp: + mock_gp.return_value = fake_parts + mock_utils_exc.return_value = ("/dev/fake: 10000 10001", None) + self.assertRaises(exception.InstanceDeployFailure, dp.commit) + + mock_disk_partitioner_exec.assert_called_once_with('mklabel', 'msdos', + 'mkpart', 'fake-type', 'fake-fs-type', '1', '2', + 'mkpart', 'fake-type', 'fake-fs-type', '2', '3', + 'set', '2', 'boot', 'on') + mock_utils_exc.assert_called_with('fuser', '/dev/fake', + run_as_root=True, check_exit_code=[0, 1]) + self.assertEqual(20, mock_utils_exc.call_count) + + @mock.patch.object(disk_partitioner.DiskPartitioner, '_exec') + @mock.patch.object(utils, 'execute') + def test_commit_with_device_disconnected(self, mock_utils_exc, + mock_disk_partitioner_exec): + dp = disk_partitioner.DiskPartitioner('/dev/fake') + fake_parts = [(1, {'bootable': False, + 'fs_type': 'fake-fs-type', + 'type': 'fake-type', + 'size': 1}), + (2, {'bootable': True, + 'fs_type': 'fake-fs-type', + 'type': 'fake-type', + 'size': 1})] + + with mock.patch.object(dp, 'get_partitions') as mock_gp: + mock_gp.return_value = fake_parts + mock_utils_exc.return_value = (None, "Specified filename /dev/fake" + " does not exist.") + self.assertRaises(exception.InstanceDeployFailure, dp.commit) + + mock_disk_partitioner_exec.assert_called_once_with('mklabel', 'msdos', + 'mkpart', 'fake-type', 'fake-fs-type', '1', '2', + 'mkpart', 'fake-type', 'fake-fs-type', '2', '3', + 'set', '2', 'boot', 'on') + mock_utils_exc.assert_called_with('fuser', '/dev/fake', + run_as_root=True, check_exit_code=[0, 1]) + self.assertEqual(20, mock_utils_exc.call_count) @mock.patch.object(utils, 'execute')