Merge "Introduce an Action Plugin to manage systemd services for containers" into stable/train

This commit is contained in:
Zuul 2020-07-22 17:26:31 +00:00 committed by Gerrit Code Review
commit 6485036bc0
9 changed files with 443 additions and 154 deletions

View File

@ -0,0 +1,406 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2020 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from __future__ import absolute_import, division, print_function
__metaclass__ = type
import copy
import os
import tenacity
import yaml
from ansible.errors import AnsibleActionFail
from ansible.plugins.action import ActionBase
from ansible.utils.display import Display
DISPLAY = Display()
DOCUMENTATION = """
module: container_systemd
author:
- "TripleO team"
version_added: '2.9'
short_description: Create systemd files and manage services to run containers
notes: []
description:
- Manage the systemd unit files for containers with a restart policy and
then make sure the services are started so the containers are running.
It takes the container config data in entry to figure out how the unit
files will be configured. It returns a list of services that were
restarted.
requirements:
- None
options:
container_config:
description:
- List of container configurations
type: list
elements: dict
systemd_healthchecks:
default: true
description:
- Whether or not we cleanup the old healthchecks with SystemD.
type: boolean
restart_containers:
description:
- List of container names to be restarted
default: []
type: list
debug:
default: false
description:
- Whether or not debug is enabled.
type: boolean
"""
EXAMPLES = """
- name: Manage container systemd services
container_systemd:
container_config:
- keystone:
image: quay.io/tripleo/keystone
restart: always
- mysql:
image: quay.io/tripleo/mysql
stop_grace_period: 25
restart: always
"""
RETURN = """
restarted:
description: List of services that were restarted
returned: always
type: list
sample:
- tripleo_keystone.service
- tripleo_mysql.service
"""
class ActionModule(ActionBase):
"""Class for the container_systemd action plugin.
"""
_VALID_ARGS = yaml.safe_load(DOCUMENTATION)['options']
def _get_args(self):
missing = []
args = {}
for option, vals in self._VALID_ARGS.items():
if 'default' not in vals:
if self._task.args.get(option, None) is None:
missing.append(option)
continue
args[option] = self._task.args.get(option)
else:
args[option] = self._task.args.get(option, vals['default'])
if missing:
raise AnsibleActionFail('Missing required parameters: {}'.format(
', '.join(missing)))
return args
def _cleanup_requires(self, container_names, task_vars):
"""Cleanup systemd requires files.
:param container_names: List of container names.
:param task_vars: Dictionary of Ansible task variables.
"""
for name in container_names:
path = "/etc/systemd/system/tripleo_{}.requires".format(name)
if self.debug:
DISPLAY.display('Removing {} file'.format(path))
results = self._execute_module(
module_name='file',
module_args=dict(path=path, state='absent'),
task_vars=task_vars
)
if results.get('changed', False):
self.changed = True
def _get_template(self, template):
"""Return systemd unit template data
:param template: template file with its extension
:returns data: Template data.
"""
if self._task._role:
file_path = self._task._role._role_path
else:
file_path = self._loader.get_basedir()
# NOTE: if templates doesn't exist, it'll always return
# file_path/systemd-service.j2
# This file is required to exist from the
# tripleo_container_manage role, as there is no
# parameter to override it now.
source = self._loader.path_dwim_relative(
file_path,
'templates',
template
)
if not os.path.exists(source):
raise AnsibleActionFail('Template {} was '
'not found'.format(source))
with open(source) as template_file:
data = template_file.read()
return data
def _create_systemd_file(self, path, data, task_vars):
sysd_file = self._execute_module(
module_name='copy',
module_args=dict(src=data,
dest=path,
mode='0644',
owner='root',
group='root'),
task_vars=task_vars)
return sysd_file
def _manage_units(self, container_config, task_vars):
"""Create systemd units and get list of changed services
:param container_config: List of dictionaries for container configs.
:param task_vars: Dictionary of Ansible task variables.
:returns changed_containers: List of containers which has a new unit.
"""
try:
remote_user = self._get_remote_user()
except Exception:
remote_user = task_vars.get('ansible_user')
if not remote_user:
remote_user = self._play_context.remote_user
tmp = self._make_tmp_path(remote_user)
unit_template = self._get_template('systemd-service.j2')
changed_containers = []
for container in container_config:
for name, config in container.items():
dest = '/etc/systemd/system/tripleo_{}.service'.format(name)
task_vars['container_data'] = container
unit = (self._templar.template(unit_template,
preserve_trailing_newlines=True,
escape_backslashes=False,
convert_data=False))
del task_vars['container_data']
remote_data = self._transfer_data(
self._connection._shell.join_path(tmp, 'source'), unit)
results = self._create_systemd_file(dest, remote_data,
task_vars)
if results.get('changed', False):
changed_containers.append(name)
if self.debug:
DISPLAY.display('Systemd unit files were created or updated for: '
'{}'.format(changed_containers))
return changed_containers
def _manage_healthchecks(self, container_config, task_vars):
"""Create systemd healthchecks and get list of changed services
:param container_config: List of dictionaries for container configs.
:param task_vars: Dictionary of Ansible task variables.
:returns changed_healthchecks: List of healthchecks which changed.
"""
try:
remote_user = self._get_remote_user()
except Exception:
remote_user = task_vars.get('ansible_user')
if not remote_user:
remote_user = self._play_context.remote_user
tmp = self._make_tmp_path(remote_user)
changed_healthchecks = []
for container in container_config:
for name, config in container.items():
if 'healthcheck' not in config:
continue
for t in ['service', 'timer']:
template = self._get_template('systemd-healthcheck-'
'{}.j2'.format(t))
task_vars['container_data'] = container
service = (
self._templar.template(template,
preserve_trailing_newlines=True,
escape_backslashes=False,
convert_data=False)
)
del task_vars['container_data']
dest = ('/etc/systemd/system/tripleo_{}_'
'healthcheck.{}'.format(name, t))
remote_data = self._transfer_data(
self._connection._shell.join_path(tmp, 'source'),
service)
results = self._create_systemd_file(dest, remote_data,
task_vars)
if results.get('changed', False) and (
name not in changed_healthchecks):
changed_healthchecks.append(name)
if self.debug:
DISPLAY.display('Systemd healthcheck was created or updated for:'
' {}'.format(changed_healthchecks))
return changed_healthchecks
def _systemd_reload(self, task_vars):
"""Reload systemd to load new units.
:param task_vars: Dictionary of Ansible task variables.
"""
if self.debug:
DISPLAY.display('Running systemd daemon reload')
results = self._execute_module(
module_name='systemd',
module_args=dict(daemon_reload=True),
task_vars=task_vars
)
if results.get('changed', False):
self.changed = True
def _add_systemd_requires(self, services, task_vars):
"""Add systemd dependencies for healthchecks.
:param services: List for service names.
:param task_vars: Dictionary of Ansible task variables.
"""
for name in services:
service = 'tripleo_{}'.format(name)
if self.debug:
DISPLAY.display('Adding systemd dependency for '
'{}'.format(service))
command = ('systemctl add-requires {}.service '.format(service)
+ '{}_healthcheck.timer'.format(service))
results = self._execute_module(
module_name='command',
module_args=dict(cmd=command),
task_vars=task_vars
)
if results.get('changed', False):
self.changed = True
@tenacity.retry(
reraise=True,
stop=tenacity.stop_after_attempt(5),
wait=tenacity.wait_fixed(5)
)
def _restart_service(self, name, extension, task_vars):
"""Restart a systemd service with retries and delay.
:param name: String for service name to restart.
:param extension: String for service to restart.
:param task_vars: Dictionary of Ansible task variables.
"""
tvars = copy.deepcopy(task_vars)
results = self._execute_module(
module_name='systemd',
module_args=dict(state='restarted',
name='tripleo_{}.{}'.format(name, extension),
enabled=True,
daemon_reload=False),
task_vars=tvars
)
if 'Result' in results['status']:
if results['status']['Result'] == 'success':
if results.get('changed', False):
self.changed = True
self.restarted.append('tripleo_{}.{}'.format(name,
extension))
return
raise AnsibleActionFail('Service {} has not started yet'.format(name))
def _restart_services(self, service_names, task_vars, extension='service'):
"""Restart systemd services.
:param service_names: List of services to restart.
:param extension: String for service to restart.
:param task_vars: Dictionary of Ansible task variables.
"""
for name in service_names:
if self.debug:
DISPLAY.display('Restarting systemd service for '
'{}'.format(name))
self._restart_service(name, extension, task_vars)
def _add_requires(self, services, task_vars):
"""Add systemd requires for healthchecks.
:param services: List of services to manage.
"""
for s in services:
# TODO
pass
def run(self, tmp=None, task_vars=None):
self.changed = False
self.restarted = []
if task_vars is None:
task_vars = dict()
result = super(ActionModule, self).run(tmp, task_vars)
del tmp
# parse args
args = self._get_args()
container_config = args['container_config']
restart_containers = args['restart_containers']
self.systemd_healthchecks = args['systemd_healthchecks']
self.debug = args['debug']
extra_restarts = []
for c in restart_containers:
s_path = os.path.join('/etc/systemd/system',
'tripleo_{}.service'.format(c))
service_stat = self._execute_module(
module_name='stat',
module_args=dict(path=s_path),
task_vars=task_vars
)
if service_stat.get('stat', {}).get('exists', False):
if self.debug:
DISPLAY.display('Systemd unit file found for {}, the '
'container will be restarted'.format(c))
extra_restarts.append(c)
container_names = []
for container in container_config:
for name, config in container.items():
container_names.append(name)
self._cleanup_requires(container_names, task_vars)
changed_services = self._manage_units(container_config, task_vars)
if len(changed_services) > 0:
self._systemd_reload(task_vars)
service_names = set(changed_services + extra_restarts)
self._restart_services(service_names, task_vars)
if self.systemd_healthchecks:
healthchecks_to_restart = []
changed_healthchecks = self._manage_healthchecks(container_config,
task_vars)
for h in changed_healthchecks:
healthchecks_to_restart.append(h + '_healthcheck')
if len(healthchecks_to_restart) > 0:
self._systemd_reload(task_vars)
self._restart_services(healthchecks_to_restart,
task_vars,
extension='timer')
self._add_systemd_requires(changed_healthchecks, task_vars)
result['changed'] = self.changed
result['restarted'] = self.restarted
return result

View File

@ -21,10 +21,14 @@
loop: "{{ all_containers_hash | subsort(attribute='start_order', null_value=0) | dict2items | list }}"
- name: "Manage container systemd services and healthchecks for {{ tripleo_container_manage_config }}"
include_tasks: podman/systemd.yml
become: true
container_systemd:
container_config: "{{ container_config }}"
debug: "{{ tripleo_container_manage_debug | bool }}"
restart_containers: "{{ containers_changed | default([]) }}"
systemd_healthchecks: "{{ (not tripleo_container_manage_healthcheck_disabled | bool) }}"
vars:
container_config: "{{ all_containers_hash | dict_to_list | haskey(attribute='restart', value=['always','unless-stopped'], any=True) | default([]) }}"
container_config_healthcheck: "{{ all_containers_hash | dict_to_list | haskey(attribute='healthcheck') | intersect(container_config) | default([]) }}"
when:
- tripleo_container_manage_cli == 'podman'
- (container_config|length) > 0

View File

@ -1,122 +0,0 @@
---
# Copyright 2019 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
- name: "Manage systemd files"
no_log: "{{ not tripleo_container_manage_debug }}"
block:
- name: "Remove trailing .requires"
file:
path: "/etc/systemd/system/tripleo_{{ lookup('dict', container_data_requires).key }}.requires"
state: absent
loop: "{{ container_config }}"
loop_control:
loop_var: container_data_requires
- name: "Create systemd services files"
template:
src: systemd-service.j2
dest: "/etc/systemd/system/tripleo_{{ lookup('dict', container_data_unit).key }}.service"
mode: '0644'
owner: root
group: root
register: systemd_file
loop: "{{ container_config }}"
loop_control:
loop_var: container_data_unit
- name: "Create systemd healthcheck files"
when:
- not tripleo_container_manage_healthcheck_disabled
- (container_config_healthcheck | length) > 0
block:
- name: "Create systemd unit files healthchecks"
template:
src: systemd-healthcheck.j2
dest: "/etc/systemd/system/tripleo_{{ lookup('dict', container_data_healthcheck).key }}_healthcheck.service"
mode: '0644'
owner: root
group: root
register: systemd_healthcheck
loop: "{{ container_config_healthcheck }}"
loop_control:
loop_var: container_data_healthcheck
- name: "Create systemd timers for healthchecks"
template:
src: systemd-timer.j2
dest: "/etc/systemd/system/tripleo_{{ lookup('dict', container_data_timer).key }}_healthcheck.timer"
mode: '0644'
owner: root
group: root
register: systemd_timer
loop: "{{ container_config_healthcheck }}"
loop_control:
loop_var: container_data_timer
- name: Create fact for container_systemd_changes
set_fact:
container_systemd_changes: >-
{{ ((systemd_file|get_changed_async_task_names) + (systemd_healthcheck|get_changed_async_task_names) +
(systemd_timer|get_changed_async_task_names)|default([])|unique) }}
- name: "Force systemd daemon reload if a systemd file changed"
systemd:
daemon_reload: true
when:
- (container_systemd_changes | length) > 0
- name: Create fact for container_systemd_restart_list
set_fact:
container_systemd_services_restart_list: "{{ (systemd_file|get_changed_async_task_names(extra=containers_changed|default([]))) }}"
container_systemd_timers_restart_list: "{{ (systemd_timer|get_changed_async_task_names(extra=containers_changed|default([]))) }}"
- name: "Start or restart systemd services"
systemd:
# Restart the service if it was already running
state: restarted
name: "tripleo_{{ container_sysd_name }}.service"
enabled: true
daemon_reload: false
loop: "{{ container_systemd_services_restart_list }}"
loop_control:
loop_var: container_sysd_name
register: systemd_service_enable
until: (systemd_service_enable.status is defined) and (systemd_service_enable.status.Result == "success")
retries: 5
delay: 5
- name: "Enable and start systemd timers"
systemd:
# Restart the timer if it was already running
state: restarted
name: "tripleo_{{ container_timer_name }}_healthcheck.timer"
enabled: true
daemon_reload: false
loop: "{{ container_systemd_timers_restart_list }}"
loop_control:
loop_var: container_timer_name
when:
- not tripleo_container_manage_healthcheck_disabled
register: systemd_healthcheck_enable
until: (systemd_healthcheck_enable.status is defined) and (systemd_healthcheck_enable.status.Result == "success")
retries: 5
delay: 5
- name: "Add systemd requires for healthchecks"
command: "systemctl add-requires tripleo_{{ container_requires_timer_name }}.service tripleo_{{ container_requires_timer_name }}_healthcheck.timer"
loop: "{{ container_systemd_timers_restart_list }}"
loop_control:
loop_var: container_requires_timer_name
when:
- not tripleo_container_manage_healthcheck_disabled

View File

@ -0,0 +1,10 @@
[Unit]
Description=tripleo_{{ lookup('dict', container_data).key }} healthcheck
After=tripleo-container-shutdown.service tripleo_{{ lookup('dict', container_data).key }}.service
Requisite=tripleo_{{ lookup('dict', container_data).key }}.service
[Service]
Type=oneshot
ExecStart=/usr/bin/podman exec --user root {{ lookup('dict', container_data).key }} {{ lookup('dict', container_data).value.healthcheck.test }}
SyslogIdentifier=healthcheck_{{ lookup('dict', container_data).key }}
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,9 @@
[Unit]
Description=tripleo_{{ lookup('dict', container_data).key }} container healthcheck
PartOf=tripleo_{{ lookup('dict', container_data).key }}.service
[Timer]
OnActiveSec=120
OnUnitActiveSec={{ lookup('dict', container_data).value.check_interval | default(60) }}
RandomizedDelaySec={{ 45 if lookup('dict', container_data).value.check_interval is not defined else (lookup('dict', container_data).value.check_interval * 3 / 4) | int | abs }}
[Install]
WantedBy=timers.target

View File

@ -1,10 +0,0 @@
[Unit]
Description=tripleo_{{ lookup('dict', container_data_healthcheck).key }} healthcheck
After=tripleo-container-shutdown.service tripleo_{{ lookup('dict', container_data_healthcheck).key }}.service
Requisite=tripleo_{{ lookup('dict', container_data_healthcheck).key }}.service
[Service]
Type=oneshot
ExecStart=/usr/bin/podman exec --user root {{ lookup('dict', container_data_healthcheck).key }} {{ lookup('dict', container_data_healthcheck).value.healthcheck.test }}
SyslogIdentifier=healthcheck_{{ lookup('dict', container_data_healthcheck).key }}
[Install]
WantedBy=multi-user.target

View File

@ -1,22 +1,22 @@
[Unit]
Description={{ lookup('dict', container_data_unit).key }} container
Description={{ lookup('dict', container_data).key }} container
After=tripleo-container-shutdown.service
Wants={{ lookup('dict', container_data_unit).value.depends_on | default([]) | join(',') }}
Wants={{ lookup('dict', container_data).value.depends_on | default([]) | join(',') }}
[Service]
Restart=always
{% if lookup('dict', container_data_unit).value.depends_on is defined and (lookup('dict', container_data_unit).value.depends_on | length > 0) and podman_drop_in | default('false') %}
ExecStart=/usr/libexec/tripleo-start-podman-container {{ lookup('dict', container_data_unit).key }}
{% if lookup('dict', container_data).value.depends_on is defined and (lookup('dict', container_data).value.depends_on | length > 0) and podman_drop_in | default('false') %}
ExecStart=/usr/libexec/tripleo-start-podman-container {{ lookup('dict', container_data).key }}
{% else %}
ExecStart=/usr/bin/podman start {{ lookup('dict', container_data_unit).key }}
ExecStart=/usr/bin/podman start {{ lookup('dict', container_data).key }}
{% endif %}
ExecReload=/usr/bin/podman kill --signal HUP {{ lookup('dict', container_data_unit).key }}
ExecStop=/usr/bin/podman stop -t {{ lookup('dict', container_data_unit).value.stop_grace_period | default(10) | int }} {{ lookup('dict', container_data_unit).key }}
ExecStopPost=/usr/bin/podman stop -t {{ lookup('dict', container_data_unit).value.stop_grace_period | default(10) | int }} {{ lookup('dict', container_data_unit).key }}
ExecReload=/usr/bin/podman kill --signal HUP {{ lookup('dict', container_data).key }}
ExecStop=/usr/bin/podman stop -t {{ lookup('dict', container_data).value.stop_grace_period | default(10) | int }} {{ lookup('dict', container_data).key }}
ExecStopPost=/usr/bin/podman stop -t {{ lookup('dict', container_data).value.stop_grace_period | default(10) | int }} {{ lookup('dict', container_data).key }}
KillMode=none
Type=forking
PIDFile=/var/run/{{ lookup('dict', container_data_unit).key }}.pid
{% if lookup('dict', container_data_unit).value.systemd_exec_flags is defined %}
{% for s_flag, s_value in lookup('dict', container_data_unit).value.systemd_exec_flags.items() %}
PIDFile=/var/run/{{ lookup('dict', container_data).key }}.pid
{% if lookup('dict', container_data).value.systemd_exec_flags is defined %}
{% for s_flag, s_value in lookup('dict', container_data).value.systemd_exec_flags.items() %}
{{ s_flag }}={{ s_value }}
{% endfor %}
{% endif %}

View File

@ -1,9 +0,0 @@
[Unit]
Description=tripleo_{{ lookup('dict', container_data_timer).key }} container healthcheck
PartOf=tripleo_{{ lookup('dict', container_data_timer).key }}.service
[Timer]
OnActiveSec=120
OnUnitActiveSec={{ lookup('dict', container_data_timer).value.check_interval | default(60) }}
RandomizedDelaySec={{ 45 if lookup('dict', container_data_timer).value.check_interval is not defined else (lookup('dict', container_data_timer).value.check_interval * 3 / 4) | int | abs }}
[Install]
WantedBy=timers.target

View File

@ -363,6 +363,7 @@
- ^tripleo_ansible/roles/tripleo-container-manage/.*
- ^tripleo_ansible/roles/tripleo-container-rm/.*
- ^tripleo_ansible/ansible_plugins/action/container_status.py$
- ^tripleo_ansible/ansible_plugins/action/container_systemd.py$
- ^tripleo_ansible/ansible_plugins/filter/helpers.py$
- ^tripleo_ansible/ansible_plugins/modules/container_config_data.py$
- ^tripleo_ansible/ansible_plugins/modules/container_puppet_config.py$