image_convert: retry resource unavailable and make RLIMIT configurable

This change fixes two potential issues with qemu-img on a busy machine:
1) Retry infrequent "Resource temporarily unavailable"
2) Make address space limits configurable instead of hardcoding to 1Gi

Story: #2007644
Task: #39702
Change-Id: I82a94e68e1aa1f634c7f0877ccd674574d1c9bb0
(cherry picked from commit 555c4948dc)
This commit is contained in:
Dmitry Tantsur 2020-05-08 15:08:55 +02:00
parent b37a06e23d
commit 5ffc447bce
2 changed files with 89 additions and 5 deletions

View File

@ -68,6 +68,13 @@ opts = [
default=10,
help='Maximum number of attempts to try to read the '
'partition.'),
cfg.IntOpt('image_convert_memory_limit',
default=1024,
help='Memory limit for "qemu-img convert" in MiB. Implemented '
'via the address space resource limit.'),
cfg.IntOpt('image_convert_attempts',
default=3,
help='Number of attempts to convert an image.'),
]
CONF = cfg.CONF
@ -85,7 +92,16 @@ MAX_CONFIG_DRIVE_SIZE_MB = 64
MAX_DISK_SIZE_MB_SUPPORTED_BY_MBR = 2097152
# Limit the memory address space to 1 GiB when running qemu-img
QEMU_IMG_LIMITS = processutils.ProcessLimits(address_space=1 * units.Gi)
QEMU_IMG_LIMITS = None
def _qemu_img_limits():
global QEMU_IMG_LIMITS
if QEMU_IMG_LIMITS is None:
QEMU_IMG_LIMITS = processutils.ProcessLimits(
address_space=CONF.disk_utils.image_convert_memory_limit
* units.Mi)
return QEMU_IMG_LIMITS
def list_partitions(device):
@ -382,14 +398,29 @@ def qemu_img_info(path):
out, err = utils.execute('env', 'LC_ALL=C', 'LANG=C',
'qemu-img', 'info', path,
prlimit=QEMU_IMG_LIMITS)
prlimit=_qemu_img_limits())
return imageutils.QemuImgInfo(out)
def convert_image(source, dest, out_format, run_as_root=False):
"""Convert image to other format."""
cmd = ('qemu-img', 'convert', '-O', out_format, source, dest)
utils.execute(*cmd, run_as_root=run_as_root, prlimit=QEMU_IMG_LIMITS)
# TODO(dtantsur): use the retrying library
for attempt in range(CONF.disk_utils.image_convert_attempts):
cmd = ('qemu-img', 'convert', '-O', out_format, source, dest)
try:
utils.execute(*cmd, run_as_root=run_as_root,
prlimit=_qemu_img_limits(),
use_standard_locale=True)
except processutils.ProcessExecutionError as e:
if ('Resource temporarily unavailable' in e.stderr
and attempt < CONF.disk_utils.image_convert_attempts - 1):
LOG.debug('Failed to convert image, retrying. Error: %s', e)
# Sync disk caches before the next attempt
utils.execute('sync')
else:
raise
else:
return
def populate_image(src, dst, conv_flags=None):

View File

@ -1045,7 +1045,60 @@ class OtherFunctionTestCase(base.IronicLibTestCase):
execute_mock.assert_called_once_with('qemu-img', 'convert', '-O',
'out_format', 'source', 'dest',
run_as_root=False,
prlimit=mock.ANY)
prlimit=mock.ANY,
use_standard_locale=True)
@mock.patch.object(utils, 'execute', autospec=True)
def test_convert_image_retries(self, execute_mock):
ret_err = 'qemu: qemu_thread_create: Resource temporarily unavailable'
execute_mock.side_effect = [
processutils.ProcessExecutionError(stderr=ret_err),
('', ''),
processutils.ProcessExecutionError(stderr=ret_err),
('', ''),
('', ''),
]
disk_utils.convert_image('source', 'dest', 'out_format')
convert_call = mock.call('qemu-img', 'convert', '-O',
'out_format', 'source', 'dest',
run_as_root=False,
prlimit=mock.ANY,
use_standard_locale=True)
execute_mock.assert_has_calls([
convert_call,
mock.call('sync'),
convert_call,
mock.call('sync'),
convert_call,
])
@mock.patch.object(utils, 'execute', autospec=True)
def test_convert_image_fails(self, execute_mock):
ret_err = 'qemu: qemu_thread_create: Resource temporarily unavailable'
execute_mock.side_effect = [
processutils.ProcessExecutionError(stderr=ret_err),
('', ''),
processutils.ProcessExecutionError(stderr=ret_err),
('', ''),
processutils.ProcessExecutionError(stderr=ret_err),
]
self.assertRaises(processutils.ProcessExecutionError,
disk_utils.convert_image,
'source', 'dest', 'out_format')
convert_call = mock.call('qemu-img', 'convert', '-O',
'out_format', 'source', 'dest',
run_as_root=False,
prlimit=mock.ANY,
use_standard_locale=True)
execute_mock.assert_has_calls([
convert_call,
mock.call('sync'),
convert_call,
mock.call('sync'),
convert_call,
])
@mock.patch.object(os.path, 'getsize', autospec=True)
@mock.patch.object(disk_utils, 'qemu_img_info', autospec=True)