Ensure systemd services are activated

When deployment times out during running the paunch command, containers
might be left in an incomplete status. Especially when podman is used
and containers are managed by systemd, sometimes paunch succeeds to
create a container but it is aborted before creating and enabling
the systemd service for that container.

This change makes paunch check status of systemd services, to ensure
any disabled/stopped/missing services and remove containers with
incomplete systemd services so that the containers and the associated
systemd services are recreated and reconfigured.

This reimplements commit 6ddd429488 which
had several issues.

Conflicts:
	paunch/tests/test_utils_systemctl.py

Resolved conflict caused by b995b0ade8 .

Closes-Bug: #1955056
Change-Id: Idc04b6a4276f0ac9e5a07af03c317e9f665eb6ce
(cherry picked from commit 483c0a3733)
This commit is contained in:
Takashi Kajinami 2022-01-19 13:16:40 +09:00
parent dd8bcbc54c
commit be899c9d95
6 changed files with 197 additions and 17 deletions

View File

@ -68,24 +68,40 @@ class BaseBuilder(object):
desired_names = set([cn[-1] for cn in container_names])
for container in sorted(self.config, key=key_fltr):
# Before creating the container, figure out if it needs to be
# removed because of its configuration has changed.
# If anything has been deleted, refresh the container_names/desired
if self.delete_updated(container, container_names):
container_names = self.runner.container_names(self.config_id)
desired_names = set([cn[-1] for cn in container_names])
self.log.debug("Running container: %s" % container)
cconfig = self.config[container]
action = cconfig.get('action', 'run')
restart = cconfig.get('restart', 'none')
exit_codes = cconfig.get('exit_codes', [0])
container_name = self.runner.unique_container_name(container)
systemd_managed = (restart != 'none'
and self.runner.cont_cmd == 'podman'
and action == 'run')
start_cmd = 'create' if systemd_managed else 'run'
force_delete = False
if systemd_managed:
if not systemd.service_is_active(container, log=self.log):
self.log.debug("Service is not activated. "
"Recreating the container.")
force_delete = True
if ((not self.healthcheck_disabled) and
'healthcheck' in cconfig and
not systemd.healthcheck_is_active(
container=container, log=self.log)):
self.log.debug("Healthcheck is not activated. "
"Recreating the container.")
force_delete = True
# Before creating the container, figure out if it needs to be
# removed because of its configuration has changed.
# This also removes containers with incomplete systemd services.
# If anything has been deleted, refresh the container_names/desired
if self.delete_updated(container, container_names, force_delete):
container_names = self.runner.container_names(self.config_id)
desired_names = set([cn[-1] for cn in container_names])
self.log.debug("Running container: %s" % container)
container_name = self.runner.unique_container_name(container)
# When upgrading from Docker to Podman, we want to stop the
# container that runs under Docker first before starting it with
# Podman. The container will be removed later in THT during
@ -203,12 +219,18 @@ class BaseBuilder(object):
"%s" % container)
return deleted
def delete_updated(self, container, container_names):
def delete_updated(self, container, container_names, force=False):
# If a container is not deployed, there is nothing to delete
if (container not in
list(itertools.chain.from_iterable(container_names))):
return False
# delete the container when it is forced
if force:
self.log.debug("Deleting container (force): %s" % container)
self.runner.remove_container(container)
return True
# fetch container inspect info
inspect_info = self.runner.inspect(container)
if not inspect_info:

View File

@ -500,11 +500,10 @@ class PodmanRunner(BaseRunner):
container)
return False
service_name = 'tripleo_' + container + '.service'
try:
systemctl.is_active(service_name)
if systemctl.is_active(service_name):
self.log.debug('Unit %s is running' % service_name)
return True
except systemctl.SystemctlException:
else:
chk_cmd = [
self.cont_cmd,
'ps',

View File

@ -14,6 +14,7 @@
# under the License.
import mock
import subprocess
from paunch.tests import base
from paunch.utils import systemctl
@ -43,9 +44,34 @@ class TestUtilsSystemctl(base.TestCase):
@mock.patch('subprocess.check_call', autospec=True)
def test_is_active(self, mock_subprocess_check_call):
systemctl.is_active('foo')
self.assertTrue(systemctl.is_active('foo'))
mock_subprocess_check_call.assert_has_calls([
mock.call(['systemctl', 'is-active', '-q', 'foo']),
mock.call(['systemctl', 'is-active', '-q', 'foo'])
])
@mock.patch('subprocess.check_call', autospec=True)
def test_is_active_inactive(self, mock_subprocess_check_call):
mock_subprocess_check_call.side_effect = \
subprocess.CalledProcessError(1, 'error')
self.assertFalse(systemctl.is_active('foo'))
mock_subprocess_check_call.assert_has_calls([
mock.call(['systemctl', 'is-active', '-q', 'foo'])
])
@mock.patch('subprocess.check_call', autospec=True)
def test_is_enabled(self, mock_subprocess_check_call):
self.assertTrue(systemctl.is_enabled('foo'))
mock_subprocess_check_call.assert_has_calls([
mock.call(['systemctl', 'is-enabled', '-q', 'foo'])
])
@mock.patch('subprocess.check_call', autospec=True)
def test_is_enabled_disabled(self, mock_subprocess_check_call):
mock_subprocess_check_call.side_effect = \
subprocess.CalledProcessError(1, 'error')
self.assertFalse(systemctl.is_enabled('foo'))
mock_subprocess_check_call.assert_has_calls([
mock.call(['systemctl', 'is-enabled', '-q', 'foo'])
])
@mock.patch('subprocess.check_call', autospec=True)

View File

@ -15,6 +15,7 @@
import mock
import os
import subprocess
import tempfile
from paunch.tests import base
@ -111,6 +112,51 @@ class TestUtilsSystemd(base.TestCase):
mock.call(os.path.join(tempdir, service_requires_d)),
])
@mock.patch('os.path.isfile', autospec=True)
@mock.patch('subprocess.check_call', autospec=True)
def test_service_is_active(self, mock_subprocess_check_call, mock_isfile):
mock_isfile.return_value = True
container = 'my_app'
service = 'tripleo_' + container
self.assertTrue(systemd.service_is_active(container))
mock_isfile.assert_called_once_with(
'/etc/systemd/system/' + service + '.service'
)
mock_subprocess_check_call.assert_has_calls([
mock.call(['systemctl', 'is-enabled', '-q', service + '.service']),
mock.call(['systemctl', 'is-active', '-q', service + '.service'])
])
@mock.patch('os.path.isfile', autospec=True)
@mock.patch('subprocess.check_call', autospec=True)
def test_service_is_active_file_not_exist(self, mock_subprocess_check_call,
mock_isfile):
mock_isfile.return_value = False
container = 'my_app'
service = 'tripleo_' + container
self.assertFalse(systemd.service_is_active(container))
mock_isfile.assert_called_once_with(
'/etc/systemd/system/' + service + '.service'
)
self.assertEqual(0, mock_subprocess_check_call.call_count)
@mock.patch('os.path.isfile', autospec=True)
@mock.patch('subprocess.check_call', autospec=True)
def test_service_is_active_stopped(self, mock_subprocess_check_call,
mock_isfile):
mock_subprocess_check_call.side_effect = \
subprocess.CalledProcessError(1, 'error')
mock_isfile.return_value = True
container = 'my_app'
service = 'tripleo_' + container
self.assertFalse(systemd.service_is_active(container))
mock_isfile.assert_called_once_with(
'/etc/systemd/system/' + service + '.service'
)
mock_subprocess_check_call.assert_has_calls([
mock.call(['systemctl', 'is-enabled', '-q', service + '.service'])
])
@mock.patch('os.chmod')
def test_healthcheck_create(self, mock_chmod):
container = 'my_app'
@ -165,3 +211,50 @@ class TestUtilsSystemd(base.TestCase):
healthcheck_timer]),
mock.call(['systemctl', 'daemon-reload']),
])
@mock.patch('os.path.isfile', autospec=True)
@mock.patch('subprocess.check_call', autospec=True)
def test_healthcheck_is_active(self, mock_subprocess_check_call,
mock_isfile):
mock_isfile.return_value = True
container = 'my_app'
healthcheck_timer = 'tripleo_' + container + '_healthcheck.timer'
self.assertTrue(systemd.healthcheck_is_active(container))
mock_isfile.assert_called_once_with(
'/etc/systemd/system/' + healthcheck_timer
)
mock_subprocess_check_call.assert_has_calls([
mock.call(['systemctl', 'is-enabled', '-q', healthcheck_timer]),
mock.call(['systemctl', 'is-active', '-q', healthcheck_timer])
])
@mock.patch('os.path.isfile', autospec=True)
@mock.patch('subprocess.check_call', autospec=True)
def test_healthcheck_is_active_file_not_exist(self,
mock_subprocess_check_call,
mock_isfile):
mock_isfile.return_value = False
container = 'my_app'
healthcheck_timer = 'tripleo_' + container + '_healthcheck.timer'
self.assertFalse(systemd.healthcheck_is_active(container))
mock_isfile.assert_called_once_with(
'/etc/systemd/system/' + healthcheck_timer
)
self.assertEqual(0, mock_subprocess_check_call.call_count)
@mock.patch('os.path.isfile', autospec=True)
@mock.patch('subprocess.check_call', autospec=True)
def test_healthcheck_is_active_stopped(self, mock_subprocess_check_call,
mock_isfile):
mock_subprocess_check_call.side_effect = \
subprocess.CalledProcessError(1, 'error')
mock_isfile.return_value = True
container = 'my_app'
healthcheck_timer = 'tripleo_' + container + '_healthcheck.timer'
self.assertFalse(systemd.healthcheck_is_active(container))
mock_isfile.assert_called_once_with(
'/etc/systemd/system/' + healthcheck_timer
)
mock_subprocess_check_call.assert_has_calls([
mock.call(['systemctl', 'is-enabled', '-q', healthcheck_timer])
])

View File

@ -51,7 +51,19 @@ def reset_failed(service, log=None):
def is_active(service, log=None):
systemctl(['is-active', '-q', service], log)
try:
systemctl(['is-active', '-q', service], log)
return True
except SystemctlException:
return False
def is_enabled(service, log=None):
try:
systemctl(['is-enabled', '-q', service], log)
return True
except SystemctlException:
return False
# NOTE(bogdando): this implements a crash-loop with reset-failed

View File

@ -163,7 +163,20 @@ def service_delete(container, sysdir=constants.SYSTEMD_DIR, log=None):
shutil.rmtree(os.path.join(sysdir, sysd_health_req_d))
def healthcheck_create(container, sysdir='/etc/systemd/system/',
def service_is_active(container, sysdir=constants.SYSTEMD_DIR, log=None):
log = log or common.configure_logging(__name__)
# prefix is explained in the service_create().
service = 'tripleo_' + container
sysd_unit_f = systemctl.format_name(service)
if not os.path.isfile(sysdir + sysd_unit_f):
return False
return (systemctl.is_enabled(sysd_unit_f) and
systemctl.is_active(sysd_unit_f))
def healthcheck_create(container, sysdir=constants.SYSTEMD_DIR,
log=None, test='/openstack/healthcheck'):
"""Create a healthcheck for a service in systemd
@ -256,3 +269,18 @@ WantedBy=timers.target""" % s_config)
except systemctl.SystemctlException:
log.exception("systemctl failed")
raise
def healthcheck_is_active(container, sysdir=constants.SYSTEMD_DIR,
log=None):
log = log or common.configure_logging(__name__)
service = 'tripleo_' + container
healthcheck_timer = service + '_healthcheck.timer'
sysd_timer_f = sysdir + healthcheck_timer
if not os.path.isfile(sysd_timer_f):
return False
return (systemctl.is_enabled(healthcheck_timer)
and systemctl.is_active(healthcheck_timer))