WIP: Ensure systemd services are activated

When deployment times out during running the paunch command, containers
might be left in an incomplete status. Especially when podman is used
and containers are managed by systemd, sometimes paunch succeeds to
create a container but it is aborted before creating and enabling
the systemd service for that container.

This change makes paunch check status of systemd services, to ensure
any disabled/stopped/missing services are properly recovered by next
run.

Closes-Bug: #1955056
Change-Id: I1713d55318834cd93a0a045970aa4800aed4cd68
This commit is contained in:
Takashi Kajinami 2021-12-17 00:17:54 +09:00
parent d89f5697b9
commit f0358db798
6 changed files with 154 additions and 7 deletions

View File

@ -102,6 +102,43 @@ class BaseBuilder(object):
if container in desired_names:
self.log.debug('Skipping existing container: %s' %
container)
if systemd_managed:
try:
if not systemd.service_enabled(
container=container_name, log=self.log):
self.log.warning(
'Service for container %s is not '
'activated. Reactivating' % container_name)
systemd.service_create(
container=container_name,
cconfig=cconfig,
log=self.log)
if (not self.healthcheck_disabled and
'healthcheck' in cconfig and
not systemd.healthcheck_enabled(
container=container_name,
log=self.log)):
self.log.warning(
'Healthcheck service for container %s is '
'not activated. Reactivating'
% container_name)
check = cconfig.get('healthcheck')['test']
systemd.healthcheck_create(
container=container_name,
log=self.log,
test=check)
systemd.healthcheck_timer_create(
container=container_name,
cconfig=cconfig,
log=self.log)
except systemctl.SystemctlMaskedException:
self.log.warning(
'Masked service for container %s '
'is not managed here' % container_name)
pass
continue
c_name = self.runner.discover_container_name(

View File

@ -500,11 +500,10 @@ class PodmanRunner(BaseRunner):
container)
return False
service_name = 'tripleo_' + container + '.service'
try:
systemctl.is_active(service_name)
if systemctl.is_active(service_name):
self.log.debug('Unit %s is running' % service_name)
return True
except systemctl.SystemctlException:
else:
chk_cmd = [
self.cont_cmd,
'ps',

View File

@ -54,12 +54,41 @@ class TestUtilsSystemctl(base.TestCase):
@mock.patch('subprocess.run', autospec=True)
def test_is_active(self, mock_subprocess_run):
mock_subprocess_run.return_value = self.r
systemctl.is_active('foo')
self.assertTrue(systemctl.is_active('foo'))
mock_subprocess_run.assert_has_calls([
mock.call(['systemctl', 'is-active', '-q', 'foo'],
stderr=-1, stdout=-1, universal_newlines=True)
])
@mock.patch('subprocess.run', autospec=True)
def test_is_active_inactive(self, mock_subprocess_run):
self.r.returncode = 1
mock_subprocess_run.return_value = self.r
self.assertFalse(systemctl.is_active('foo'))
mock_subprocess_run.assert_has_calls([
mock.call(['systemctl', 'is-active', '-q', 'foo'],
stderr=-1, stdout=-1, universal_newlines=True)
])
@mock.patch('subprocess.run', autospec=True)
def test_is_enabled(self, mock_subprocess_run):
mock_subprocess_run.return_value = self.r
self.assertTrue(systemctl.is_enabled('foo'))
mock_subprocess_run.assert_has_calls([
mock.call(['systemctl', 'is-enabled', '-q', 'foo'],
stderr=-1, stdout=-1, universal_newlines=True)
])
@mock.patch('subprocess.run', autospec=True)
def test_is_enabled_disabled(self, mock_subprocess_run):
self.r.returncode = 1
mock_subprocess_run.return_value = self.r
self.assertFalse(systemctl.is_enabled('foo'))
mock_subprocess_run.assert_has_calls([
mock.call(['systemctl', 'is-enabled', '-q', 'foo'],
stderr=-1, stdout=-1, universal_newlines=True)
])
@mock.patch('subprocess.run', autospec=True)
def test_enable(self, mock_subprocess_run):
mock_subprocess_run.return_value = self.r

View File

@ -131,6 +131,48 @@ class TestUtilsSystemd(base.TestCase):
mock.call(os.path.join(tempdir, service_requires_d)),
])
@mock.patch('os.path.isfile', autospec=True)
@mock.patch('subprocess.run', autospec=True)
def test_service_is_active(self, mock_subprocess_run, mock_isfile):
mock_subprocess_run.return_value = self.r
mock_isfile.return_value = True
container = 'my_app'
service = 'tripleo_' + container
self.assertTrue(systemd.service_is_active(container))
mock_subprocess_run.assert_has_calls([
mock.call(['systemctl', 'is-enabled', '-q', service + '.service'],
stderr=-1, stdout=-1, universal_newlines=True),
mock.call(['systemctl', 'is-active', '-q', service + '.service'],
stderr=-1, stdout=-1, universal_newlines=True),
])
@mock.patch('os.path.isfile', autospec=True)
@mock.patch('subprocess.run', autospec=True)
def test_service_is_active_file_not_exist(self, mock_subprocess_run,
mock_isfile):
mock_subprocess_run.return_value = self.r
mock_isfile.return_value = False
container = 'my_app'
self.assertFalse(systemd.service_is_active(container))
self.assertEqual(0, mock_subprocess_run.call_count)
@mock.patch('os.path.isfile', autospec=True)
@mock.patch('subprocess.run', autospec=True)
def test_service_is_active_stopped(self, mock_subprocess_run, mock_isfile):
mock_subprocess_run.return_value = self.r
self.r.returncode = 1
mock_isfile.return_value = True
container = 'my_app'
service = 'tripleo_' + container
self.assertFalse(systemd.service_is_active(container))
mock_subprocess_run.assert_has_calls([
mock.call(['systemctl', 'is-enabled', '-q', service + '.service'],
stderr=-1, stdout=-1, universal_newlines=True)
])
@mock.patch('os.chmod')
def test_healthcheck_create(self, mock_chmod):
container = 'my_app'

View File

@ -58,7 +58,19 @@ def reset_failed(service, log=None):
def is_active(service, log=None):
systemctl(['is-active', '-q', service], log)
try:
systemctl(['is-active', '-q', service], log)
return True
except SystemctlException:
return False
def is_enabled(service, log=None):
try:
systemctl(['is-enabled', '-q', service], log)
return True
except SystemctlException:
return False
def is_masked(service, log=None):

View File

@ -159,7 +159,20 @@ def service_delete(container, sysdir=constants.SYSTEMD_DIR, log=None):
shutil.rmtree(os.path.join(sysdir, sysd_health_req_d))
def healthcheck_create(container, sysdir='/etc/systemd/system/',
def service_is_active(container, sysdir=constants.SYSTEMD_DIR, log=None):
log = log or common.configure_logging(__name__)
# prefix is explained in the service_create().
service = 'tripleo_' + container
sysd_unit_f = systemctl.format_name(service)
if not os.path.isfile(sysdir + sysd_unit_f):
return False
return (systemctl.is_enabled(sysd_unit_f) and
systemctl.is_active(sysd_unit_f))
def healthcheck_create(container, sysdir=constants.SYSTEMD_DIR,
log=None, test='/openstack/healthcheck'):
"""Create a healthcheck for a service in systemd
@ -203,7 +216,7 @@ WantedBy=multi-user.target
""" % s_config)
def healthcheck_timer_create(container, cconfig, sysdir='/etc/systemd/system/',
def healthcheck_timer_create(container, cconfig, sysdir=constants.SYSTEMD_DIR,
log=None):
"""Create a systemd timer for a healthcheck
@ -252,3 +265,18 @@ WantedBy=timers.target""" % s_config)
except systemctl.SystemctlException:
log.exception("systemctl failed")
raise
def healthcheck_is_active(container, sysdir=constants.SYSTEMD_DIR,
log=None):
log = log or common.configure_logging(__name__)
service = 'tripleo_' + container
healthcheck_timer = service + '_healthcheck.timer'
sysd_timer_f = sysdir + healthcheck_timer
if not os.path.isfile(sysdir + sysd_timer_f):
return False
return (systemctl.is_enabled(sysd_timer_f)
and systemctl.is_active(sysd_timer_f))