Ensure systemd services are activated
When deployment times out during running the paunch command, containers might be left in an incomplete status. Especially when podman is used and containers are managed by systemd, sometimes paunch succeeds to create a container but it is aborted before creating and enabling the systemd service for that container. This change makes paunch check status of systemd services, to ensure any disabled/stopped/missing services and remove containers with incomplete systemd services so that the containers and the associated systemd services are recreated and reconfigured. This reimplements commit6ddd429488
which had several issues. Conflicts: paunch/tests/test_utils_systemctl.py Resolved conflict caused byb995b0ade8
. Closes-Bug: #1955056 Change-Id: Idc04b6a4276f0ac9e5a07af03c317e9f665eb6ce (cherry picked from commit483c0a3733
)
This commit is contained in:
parent
dd8bcbc54c
commit
be899c9d95
|
@ -68,24 +68,40 @@ class BaseBuilder(object):
|
|||
desired_names = set([cn[-1] for cn in container_names])
|
||||
|
||||
for container in sorted(self.config, key=key_fltr):
|
||||
# Before creating the container, figure out if it needs to be
|
||||
# removed because of its configuration has changed.
|
||||
# If anything has been deleted, refresh the container_names/desired
|
||||
if self.delete_updated(container, container_names):
|
||||
container_names = self.runner.container_names(self.config_id)
|
||||
desired_names = set([cn[-1] for cn in container_names])
|
||||
|
||||
self.log.debug("Running container: %s" % container)
|
||||
cconfig = self.config[container]
|
||||
action = cconfig.get('action', 'run')
|
||||
restart = cconfig.get('restart', 'none')
|
||||
exit_codes = cconfig.get('exit_codes', [0])
|
||||
container_name = self.runner.unique_container_name(container)
|
||||
systemd_managed = (restart != 'none'
|
||||
and self.runner.cont_cmd == 'podman'
|
||||
and action == 'run')
|
||||
start_cmd = 'create' if systemd_managed else 'run'
|
||||
|
||||
force_delete = False
|
||||
if systemd_managed:
|
||||
if not systemd.service_is_active(container, log=self.log):
|
||||
self.log.debug("Service is not activated. "
|
||||
"Recreating the container.")
|
||||
force_delete = True
|
||||
if ((not self.healthcheck_disabled) and
|
||||
'healthcheck' in cconfig and
|
||||
not systemd.healthcheck_is_active(
|
||||
container=container, log=self.log)):
|
||||
self.log.debug("Healthcheck is not activated. "
|
||||
"Recreating the container.")
|
||||
force_delete = True
|
||||
|
||||
# Before creating the container, figure out if it needs to be
|
||||
# removed because of its configuration has changed.
|
||||
# This also removes containers with incomplete systemd services.
|
||||
# If anything has been deleted, refresh the container_names/desired
|
||||
if self.delete_updated(container, container_names, force_delete):
|
||||
container_names = self.runner.container_names(self.config_id)
|
||||
desired_names = set([cn[-1] for cn in container_names])
|
||||
|
||||
self.log.debug("Running container: %s" % container)
|
||||
container_name = self.runner.unique_container_name(container)
|
||||
|
||||
# When upgrading from Docker to Podman, we want to stop the
|
||||
# container that runs under Docker first before starting it with
|
||||
# Podman. The container will be removed later in THT during
|
||||
|
@ -203,12 +219,18 @@ class BaseBuilder(object):
|
|||
"%s" % container)
|
||||
return deleted
|
||||
|
||||
def delete_updated(self, container, container_names):
|
||||
def delete_updated(self, container, container_names, force=False):
|
||||
# If a container is not deployed, there is nothing to delete
|
||||
if (container not in
|
||||
list(itertools.chain.from_iterable(container_names))):
|
||||
return False
|
||||
|
||||
# delete the container when it is forced
|
||||
if force:
|
||||
self.log.debug("Deleting container (force): %s" % container)
|
||||
self.runner.remove_container(container)
|
||||
return True
|
||||
|
||||
# fetch container inspect info
|
||||
inspect_info = self.runner.inspect(container)
|
||||
if not inspect_info:
|
||||
|
|
|
@ -500,11 +500,10 @@ class PodmanRunner(BaseRunner):
|
|||
container)
|
||||
return False
|
||||
service_name = 'tripleo_' + container + '.service'
|
||||
try:
|
||||
systemctl.is_active(service_name)
|
||||
if systemctl.is_active(service_name):
|
||||
self.log.debug('Unit %s is running' % service_name)
|
||||
return True
|
||||
except systemctl.SystemctlException:
|
||||
else:
|
||||
chk_cmd = [
|
||||
self.cont_cmd,
|
||||
'ps',
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
# under the License.
|
||||
|
||||
import mock
|
||||
import subprocess
|
||||
|
||||
from paunch.tests import base
|
||||
from paunch.utils import systemctl
|
||||
|
@ -43,9 +44,34 @@ class TestUtilsSystemctl(base.TestCase):
|
|||
|
||||
@mock.patch('subprocess.check_call', autospec=True)
|
||||
def test_is_active(self, mock_subprocess_check_call):
|
||||
systemctl.is_active('foo')
|
||||
self.assertTrue(systemctl.is_active('foo'))
|
||||
mock_subprocess_check_call.assert_has_calls([
|
||||
mock.call(['systemctl', 'is-active', '-q', 'foo']),
|
||||
mock.call(['systemctl', 'is-active', '-q', 'foo'])
|
||||
])
|
||||
|
||||
@mock.patch('subprocess.check_call', autospec=True)
|
||||
def test_is_active_inactive(self, mock_subprocess_check_call):
|
||||
mock_subprocess_check_call.side_effect = \
|
||||
subprocess.CalledProcessError(1, 'error')
|
||||
self.assertFalse(systemctl.is_active('foo'))
|
||||
mock_subprocess_check_call.assert_has_calls([
|
||||
mock.call(['systemctl', 'is-active', '-q', 'foo'])
|
||||
])
|
||||
|
||||
@mock.patch('subprocess.check_call', autospec=True)
|
||||
def test_is_enabled(self, mock_subprocess_check_call):
|
||||
self.assertTrue(systemctl.is_enabled('foo'))
|
||||
mock_subprocess_check_call.assert_has_calls([
|
||||
mock.call(['systemctl', 'is-enabled', '-q', 'foo'])
|
||||
])
|
||||
|
||||
@mock.patch('subprocess.check_call', autospec=True)
|
||||
def test_is_enabled_disabled(self, mock_subprocess_check_call):
|
||||
mock_subprocess_check_call.side_effect = \
|
||||
subprocess.CalledProcessError(1, 'error')
|
||||
self.assertFalse(systemctl.is_enabled('foo'))
|
||||
mock_subprocess_check_call.assert_has_calls([
|
||||
mock.call(['systemctl', 'is-enabled', '-q', 'foo'])
|
||||
])
|
||||
|
||||
@mock.patch('subprocess.check_call', autospec=True)
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
|
||||
import mock
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
from paunch.tests import base
|
||||
|
@ -111,6 +112,51 @@ class TestUtilsSystemd(base.TestCase):
|
|||
mock.call(os.path.join(tempdir, service_requires_d)),
|
||||
])
|
||||
|
||||
@mock.patch('os.path.isfile', autospec=True)
|
||||
@mock.patch('subprocess.check_call', autospec=True)
|
||||
def test_service_is_active(self, mock_subprocess_check_call, mock_isfile):
|
||||
mock_isfile.return_value = True
|
||||
container = 'my_app'
|
||||
service = 'tripleo_' + container
|
||||
self.assertTrue(systemd.service_is_active(container))
|
||||
mock_isfile.assert_called_once_with(
|
||||
'/etc/systemd/system/' + service + '.service'
|
||||
)
|
||||
mock_subprocess_check_call.assert_has_calls([
|
||||
mock.call(['systemctl', 'is-enabled', '-q', service + '.service']),
|
||||
mock.call(['systemctl', 'is-active', '-q', service + '.service'])
|
||||
])
|
||||
|
||||
@mock.patch('os.path.isfile', autospec=True)
|
||||
@mock.patch('subprocess.check_call', autospec=True)
|
||||
def test_service_is_active_file_not_exist(self, mock_subprocess_check_call,
|
||||
mock_isfile):
|
||||
mock_isfile.return_value = False
|
||||
container = 'my_app'
|
||||
service = 'tripleo_' + container
|
||||
self.assertFalse(systemd.service_is_active(container))
|
||||
mock_isfile.assert_called_once_with(
|
||||
'/etc/systemd/system/' + service + '.service'
|
||||
)
|
||||
self.assertEqual(0, mock_subprocess_check_call.call_count)
|
||||
|
||||
@mock.patch('os.path.isfile', autospec=True)
|
||||
@mock.patch('subprocess.check_call', autospec=True)
|
||||
def test_service_is_active_stopped(self, mock_subprocess_check_call,
|
||||
mock_isfile):
|
||||
mock_subprocess_check_call.side_effect = \
|
||||
subprocess.CalledProcessError(1, 'error')
|
||||
mock_isfile.return_value = True
|
||||
container = 'my_app'
|
||||
service = 'tripleo_' + container
|
||||
self.assertFalse(systemd.service_is_active(container))
|
||||
mock_isfile.assert_called_once_with(
|
||||
'/etc/systemd/system/' + service + '.service'
|
||||
)
|
||||
mock_subprocess_check_call.assert_has_calls([
|
||||
mock.call(['systemctl', 'is-enabled', '-q', service + '.service'])
|
||||
])
|
||||
|
||||
@mock.patch('os.chmod')
|
||||
def test_healthcheck_create(self, mock_chmod):
|
||||
container = 'my_app'
|
||||
|
@ -165,3 +211,50 @@ class TestUtilsSystemd(base.TestCase):
|
|||
healthcheck_timer]),
|
||||
mock.call(['systemctl', 'daemon-reload']),
|
||||
])
|
||||
|
||||
@mock.patch('os.path.isfile', autospec=True)
|
||||
@mock.patch('subprocess.check_call', autospec=True)
|
||||
def test_healthcheck_is_active(self, mock_subprocess_check_call,
|
||||
mock_isfile):
|
||||
mock_isfile.return_value = True
|
||||
container = 'my_app'
|
||||
healthcheck_timer = 'tripleo_' + container + '_healthcheck.timer'
|
||||
self.assertTrue(systemd.healthcheck_is_active(container))
|
||||
mock_isfile.assert_called_once_with(
|
||||
'/etc/systemd/system/' + healthcheck_timer
|
||||
)
|
||||
mock_subprocess_check_call.assert_has_calls([
|
||||
mock.call(['systemctl', 'is-enabled', '-q', healthcheck_timer]),
|
||||
mock.call(['systemctl', 'is-active', '-q', healthcheck_timer])
|
||||
])
|
||||
|
||||
@mock.patch('os.path.isfile', autospec=True)
|
||||
@mock.patch('subprocess.check_call', autospec=True)
|
||||
def test_healthcheck_is_active_file_not_exist(self,
|
||||
mock_subprocess_check_call,
|
||||
mock_isfile):
|
||||
mock_isfile.return_value = False
|
||||
container = 'my_app'
|
||||
healthcheck_timer = 'tripleo_' + container + '_healthcheck.timer'
|
||||
self.assertFalse(systemd.healthcheck_is_active(container))
|
||||
mock_isfile.assert_called_once_with(
|
||||
'/etc/systemd/system/' + healthcheck_timer
|
||||
)
|
||||
self.assertEqual(0, mock_subprocess_check_call.call_count)
|
||||
|
||||
@mock.patch('os.path.isfile', autospec=True)
|
||||
@mock.patch('subprocess.check_call', autospec=True)
|
||||
def test_healthcheck_is_active_stopped(self, mock_subprocess_check_call,
|
||||
mock_isfile):
|
||||
mock_subprocess_check_call.side_effect = \
|
||||
subprocess.CalledProcessError(1, 'error')
|
||||
mock_isfile.return_value = True
|
||||
container = 'my_app'
|
||||
healthcheck_timer = 'tripleo_' + container + '_healthcheck.timer'
|
||||
self.assertFalse(systemd.healthcheck_is_active(container))
|
||||
mock_isfile.assert_called_once_with(
|
||||
'/etc/systemd/system/' + healthcheck_timer
|
||||
)
|
||||
mock_subprocess_check_call.assert_has_calls([
|
||||
mock.call(['systemctl', 'is-enabled', '-q', healthcheck_timer])
|
||||
])
|
||||
|
|
|
@ -51,7 +51,19 @@ def reset_failed(service, log=None):
|
|||
|
||||
|
||||
def is_active(service, log=None):
|
||||
systemctl(['is-active', '-q', service], log)
|
||||
try:
|
||||
systemctl(['is-active', '-q', service], log)
|
||||
return True
|
||||
except SystemctlException:
|
||||
return False
|
||||
|
||||
|
||||
def is_enabled(service, log=None):
|
||||
try:
|
||||
systemctl(['is-enabled', '-q', service], log)
|
||||
return True
|
||||
except SystemctlException:
|
||||
return False
|
||||
|
||||
|
||||
# NOTE(bogdando): this implements a crash-loop with reset-failed
|
||||
|
|
|
@ -163,7 +163,20 @@ def service_delete(container, sysdir=constants.SYSTEMD_DIR, log=None):
|
|||
shutil.rmtree(os.path.join(sysdir, sysd_health_req_d))
|
||||
|
||||
|
||||
def healthcheck_create(container, sysdir='/etc/systemd/system/',
|
||||
def service_is_active(container, sysdir=constants.SYSTEMD_DIR, log=None):
|
||||
log = log or common.configure_logging(__name__)
|
||||
# prefix is explained in the service_create().
|
||||
service = 'tripleo_' + container
|
||||
|
||||
sysd_unit_f = systemctl.format_name(service)
|
||||
if not os.path.isfile(sysdir + sysd_unit_f):
|
||||
return False
|
||||
|
||||
return (systemctl.is_enabled(sysd_unit_f) and
|
||||
systemctl.is_active(sysd_unit_f))
|
||||
|
||||
|
||||
def healthcheck_create(container, sysdir=constants.SYSTEMD_DIR,
|
||||
log=None, test='/openstack/healthcheck'):
|
||||
"""Create a healthcheck for a service in systemd
|
||||
|
||||
|
@ -256,3 +269,18 @@ WantedBy=timers.target""" % s_config)
|
|||
except systemctl.SystemctlException:
|
||||
log.exception("systemctl failed")
|
||||
raise
|
||||
|
||||
|
||||
def healthcheck_is_active(container, sysdir=constants.SYSTEMD_DIR,
|
||||
log=None):
|
||||
log = log or common.configure_logging(__name__)
|
||||
|
||||
service = 'tripleo_' + container
|
||||
healthcheck_timer = service + '_healthcheck.timer'
|
||||
sysd_timer_f = sysdir + healthcheck_timer
|
||||
|
||||
if not os.path.isfile(sysd_timer_f):
|
||||
return False
|
||||
|
||||
return (systemctl.is_enabled(healthcheck_timer)
|
||||
and systemctl.is_active(healthcheck_timer))
|
||||
|
|
Loading…
Reference in New Issue