Combined backport to fix issues and enhance efficiency

The LXC host role can be tuned up for better overall efficiency.

Highlights:
* Move async wait to a later position for role performance. The
  async wait we're doing can be moved elsewhere in the role so
  that we're able to do more in parallel. This change simply moves
  the async wait to a postition just before its required.
* Move container creation tasks into their own sub-files which are
  accessed using dynamic routing.
* Several syntatic items were cleaned up.
* All of the basic cache cleanup has been moved to handlers.

These changes further optimise the lxc_host role so that it's using more
of the built in modules and making better use of handlers.

Moving the dnsmasq process to a unit file gives operators the ability to
restart the dnsmasq process if there's an issue with the service. It
also ensures the service stays running as systemd will take better care
of the service by isolating it within a specific cgroup, ensuring good
reporting and memory management, and providing the ability to recover
from failures in an automated way.

Closes-Bug: #1718979
Closes-Bug: #1518485
(cherry picked from commit 076493d014)
(cherry picked from commit 53a6cce9ed)

Change-Id: If7dfbae19429cb033d7fd7e33f1423627f091534
This commit is contained in:
Kevin Carter 2017-10-30 20:54:12 -05:00 committed by Kevin Carter (cloudnull)
parent a7f49c9a65
commit be93ac8d3f
17 changed files with 304 additions and 169 deletions

View File

@ -27,11 +27,20 @@
- name: Init reload
command: "initctl reload-configuration"
- name: Restart dbus
service:
name: "dbus"
state: "reloaded"
- name: Restart machined
command: "systemctl restart systemd-machined.service"
- name: Restart irqbalance
service:
name: "irqbalance"
state: "restarted"
enabled: "yes"
daemon_reload: yes
- name: Restart bridge
shell: "ifdown {{ lxc_net_bridge }} || true"
@ -51,3 +60,29 @@
# parameter can be removed when we move to Ansible 2.4
name: it_does_not_matter
daemon_reload: yes
- name: Remove generated apt keys from LXC host
file:
path: /root/repo.keys
state: absent
- name: Remove rootfs archive
file:
path: "/tmp/rootfs.tar.xz"
state: "absent"
- name: Remove metadata archive
file:
path: "/tmp/meta.tar.xz"
state: "absent"
- name: Restart dnsmasq
service:
name: "lxc-dnsmasq"
state: "restarted"
enabled: "yes"
daemon_reload: yes
register: _lxc_dnsmasq_service
until: _lxc_dnsmasq_service | success
retries: 5
delay: 5

View File

@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
- name : Check cached image status
- name: Check cached image status
command: "machinectl image-status {{ lxc_container_base_name }}"
register: cache_check
changed_when: false

View File

@ -60,81 +60,29 @@
src: "/tmp/meta.tar.xz"
dest: "{{ cache_path_fact }}"
remote_src: True
- name: Remove metadata archive
file:
path: "/tmp/meta.tar.xz"
state: "absent"
notify:
- Remove metadata archive
- name: Set cache expiry
shell: "date -d @{{ (cache_time | int) + 31536000 }} > {{ cache_path_fact }}/expiry"
shell: "date -d @{{ (cache_time | int) + 31536000 }}"
changed_when: false
register: _cache_expiry
tags:
- skip_ansible_lint
- name: Set expiry
copy:
content: "{{ _cache_expiry.stdout }}"
dest: "{{ cache_path_fact }}/expiry"
- name: Set build ID
shell: "echo {{ cache_time }} > {{ cache_path_fact }}/build_id"
tags:
- skip_ansible_lint
copy:
content: "{{ cache_time }}"
dest: "{{ cache_path_fact }}/build_id"
- name: Create base container to use for overlayfs containers
lxc_container:
name: "{{ lxc_container_base_name }}"
template: "download"
state: stopped
backing_store: "dir"
template_options: "{{ lxc_cache_download_template_options }}"
register: cache_download
retries: 3
delay: 10
until: cache_download|success
when:
- lxc_container_backing_store is defined
- lxc_container_backing_store == 'overlayfs'
- name: Create base container to use for LVM-backed copy-on-write containers
lxc_container:
name: "{{ lxc_container_base_name }}"
template: "download"
state: stopped
backing_store: "lvm"
template_options: "{{ lxc_cache_download_template_options }}"
register: cache_download
retries: 3
delay: 10
until: cache_download|success
when:
- lxc_container_backing_store is defined
- lxc_container_backing_store == 'lvm'
- lxc_container_backing_method is defined
- lxc_container_backing_method == 'copy-on-write'
- name: Create base container to use for ZFS-backed containers
lxc_container:
name: "{{ lxc_container_base_name }}"
template: "download"
state: stopped
backing_store: "zfs"
zfs_root: "{{ lxc_container_zfs_root_name }}"
template_options: "{{ lxc_cache_download_template_options }}"
register: cache_download
retries: 3
delay: 10
until: cache_download|success
when:
- lxc_container_backing_store is defined
- lxc_container_backing_store == 'zfs'
- name: Create base container to use for BTRFS-backed containers
lxc_container:
name: "{{ lxc_container_base_name }}"
template: "download"
state: stopped
backing_store: "btrfs"
template_options: "{{ lxc_cache_download_template_options }}"
register: cache_download
retries: 3
delay: 10
until: cache_download|success
when:
- lxc_container_backing_store is defined
- lxc_container_backing_store == 'btrfs'
- include: "{{ item }}"
static: no
with_first_found:
- files:
- "lxc_container_{{ lxc_container_backing_store | default('None') }}.yml"
skip: true

View File

@ -40,31 +40,44 @@
with_items:
- systemd-machined.service.d
- systemd-importd.service.d
when: (deployment_environment_variables | default({})).keys() | length > 0
when:
- (deployment_environment_variables | default({})).keys() | length > 0
- include: "lxc_cache_preparation_systemd_{{ (systemd_version.stdout_lines[0].split()[-1] | int > 219) | ternary('new', 'old') }}.yml"
- name: Remove rootfs archive
file:
path: "/tmp/rootfs.tar.xz"
state: "absent"
- block:
- name: Generate apt keys from LXC host for the container cache
shell: "apt-key exportall"
changed_when: false
register: _apt_exportall
tags:
- skip_ansible_lint
- name: Generate apt keys from LXC host for the container cache
shell: apt-key exportall > /root/repo.keys
changed_when: False
- name: Write exported keys to temporary file
copy:
content: "{{ _apt_exportall.stdout }}"
dest: "/root/repo.keys"
notify:
- Remove generated apt keys from LXC host
when:
- ansible_pkg_mgr == 'apt'
# NOTE(cloudnull): We're using rsync and an if block because we've no means
# to loop over a block. Re-evaluate this task when/if this is
# merged https://github.com/ansible/ansible/issues/13262
- name: Rsyncing files from the LXC host to the container cache
shell: |
if [[ -e "{{ item }}" ]]; then
rsync -av "{{ item }}" "{{ lxc_image_cache_path }}{{ item }}"
else
exit 3
fi
changed_when: _rsync_container_cache.rc == 0
failed_when: _rsync_container_cache.rc not in [0, 3]
register: _rsync_container_cache
args:
executable: "/bin/bash"
with_items: "{{ (lxc_cache_map.copy_from_host | union(lxc_container_cache_files_from_host)) | list }}"
tags:
- skip_ansible_lint
- name: Ensure directories exist for lxc_container_cache_files
file:
@ -101,22 +114,6 @@
poll: 0
register: _lxc_cache_prepare_commands
- name: Ensure that the LXC cache has been prepared
async_status:
jid: "{{ _lxc_cache_prepare_commands.ansible_job_id }}"
register: _lxc_cache_prepare_commands_result
until: _lxc_cache_prepare_commands_result.finished
delay: 10
retries: 60
- name: Adjust sshd configuration in container
lineinfile:
dest: "{{ lxc_image_cache_path }}/etc/ssh/sshd_config"
regexp: "{{ item.regexp }}"
line: "{{ item.line }}"
state: present
with_items: "{{ lxc_cache_sshd_configuration }}"
- name: Obtain the deploy system's ssh public key
set_fact:
lxc_container_ssh_key: "{{ lookup('file', '/root/.ssh/id_rsa.pub') }}"
@ -128,13 +125,15 @@
line: "{{ lxc_container_ssh_key }}"
create: true
- name: Remove generated apt keys from LXC host
file:
path: /root/repo.keys
state: absent
when:
- ansible_pkg_mgr == 'apt'
changed_when: False
# NOTE(cloudnull): Wait for the cache preparation script has completed before
# building the new RootFS
- name: Ensure that the LXC cache has been prepared
async_status:
jid: "{{ _lxc_cache_prepare_commands.ansible_job_id }}"
register: _lxc_cache_prepare_commands_result
until: _lxc_cache_prepare_commands_result.finished
delay: 10
retries: 60
- name: Remove requiretty for sudo on centos
template:
@ -145,3 +144,11 @@
src: sudoers.j2
when:
- ansible_pkg_mgr == 'yum'
- name: Adjust sshd configuration in container
lineinfile:
dest: "{{ lxc_image_cache_path }}/etc/ssh/sshd_config"
regexp: "{{ item.regexp }}"
line: "{{ item.line }}"
state: present
with_items: "{{ lxc_cache_sshd_configuration }}"

View File

@ -56,3 +56,5 @@
- pull_image.rc != 0
- "'failed' in pull_image.stderr | lower"
with_items: "{{ lxc_images }}"
notify:
- Remove rootfs archive

View File

@ -18,7 +18,7 @@
# ==============================================================
# In later versions of SystemD this is automatically done for us
# by the machinectl cli on first run.
- name : Create volume
- name: Create volume
shell: |
if [[ "$(ls -lh /var/lib/machines.raw | awk '{print $5}')" != "{{ lxc_host_machine_volume_size }}.0G" ]]; then
if [[ ! -f "/var/lib/machines.raw" ]]; then
@ -58,6 +58,8 @@
src: var-lib-machines.mount
dest: /etc/systemd/system/var-lib-machines.mount
register: mount_unit
notify:
- Reload systemd units
when:
- machines_create | changed
@ -69,6 +71,9 @@
src: systemd-machined.service.j2
dest: /etc/systemd/system/systemd-machined.service
register: machined_unit
notify:
- Reload systemd units
- Restart machined
when:
- machines_create | changed
@ -80,20 +85,14 @@
src: org.freedesktop.machine1.conf
dest: /etc/dbus-1/system.d/org.freedesktop.machine1.conf
register: machine1_conf
notify:
- Reload systemd units
- Restart dbus
when:
- machines_create | changed
- name: Reload the System daemon
command: "systemctl daemon-reload"
when: >
mount_unit | changed or
machined_unit | changed or
machine1_conf | changed
- name: Restart dbus
command: "systemctl reload dbus.service"
when:
- machine1_conf | changed
# Ensure lxc networks are running as they're supposed to
- meta: flush_handlers
# Ignore the Ansible warning here about using 'mount' via the shell module
# instead of using the mount Ansible module.
@ -107,11 +106,6 @@
tags:
- skip_ansible_lint
- name: Restart machined
command: "systemctl restart systemd-machined.service"
when:
- machined_unit | changed
# Because of this post and it's related bug(s) this is adding the container
# volumes the old way. The new way would simply be calling `machinectl`.
# * https://www.mail-archive.com/systemd-devel@lists.freedesktop.org/msg28255.html
@ -145,3 +139,5 @@
src: "/tmp/rootfs.tar.xz"
dest: "/var/lib/machines/{{ lxc_container_base_name }}"
remote_src: True
notify:
- Remove rootfs archive

View File

@ -0,0 +1,26 @@
---
# Copyright 2016, Rackspace US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
- name: Create base container to use for BTRFS-backed containers
lxc_container:
name: "{{ lxc_container_base_name }}"
template: "download"
state: stopped
backing_store: "btrfs"
template_options: "{{ lxc_cache_download_template_options }}"
register: cache_download
retries: 3
delay: 10
until: cache_download|success

View File

@ -0,0 +1,29 @@
---
# Copyright 2016, Rackspace US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
- name: Create base container to use for LVM-backed copy-on-write containers
lxc_container:
name: "{{ lxc_container_base_name }}"
template: "download"
state: stopped
backing_store: "lvm"
template_options: "{{ lxc_cache_download_template_options }}"
register: cache_download
retries: 3
delay: 10
until: cache_download|success
when:
- lxc_container_backing_method is defined
- lxc_container_backing_method == 'copy-on-write'

View File

@ -0,0 +1,26 @@
---
# Copyright 2016, Rackspace US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
- name: Create base container to use for overlayfs containers
lxc_container:
name: "{{ lxc_container_base_name }}"
template: "download"
state: stopped
backing_store: "dir"
template_options: "{{ lxc_cache_download_template_options }}"
register: cache_download
retries: 3
delay: 10
until: cache_download|success

View File

@ -0,0 +1,27 @@
---
# Copyright 2016, Rackspace US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
- name: Create base container to use for ZFS-backed containers
lxc_container:
name: "{{ lxc_container_base_name }}"
template: "download"
state: stopped
backing_store: "zfs"
zfs_root: "{{ lxc_container_zfs_root_name }}"
template_options: "{{ lxc_cache_download_template_options }}"
register: cache_download
retries: 3
delay: 10
until: cache_download|success

View File

@ -29,6 +29,13 @@
- lxc-bridge
- lxc-interfaces
# NOTE(cloudnull): This task cleans up an old post up script that is no longer
# used. Remove this task in "R".
- name: Remove old post up script
file:
path: "/etc/sysconfig/network-scripts/ifup-post-{{ lxc_net_bridge }}"
state: "absent"
- name: Drop lxc net bridge routes (SUSE)
template:
src: "lxc-net-suse-routes.cfg.j2"
@ -98,6 +105,19 @@
- lxc-net
- lxc-interfaces
- name: Create systemd unit for dnsmasq
template:
src: lxc-dnsmasq-systemd-init.j2
dest: "/etc/systemd/system/lxc-dnsmasq.service"
owner: root
group: root
mode: '0644'
when:
- lxc_net_nat | bool
notify:
- Reload systemd units
- Restart dnsmasq
# Check that the container bridge exists, if not bring it up
- name: Check Container Bridge exists
file:

View File

@ -87,25 +87,32 @@
# https://bugs.archlinux.org/index.php?do=details&action=details.addvote&task_id=47303
# In any case, it's best to also edit the pids controller to ensure that a
# better max value is used in the init.scope
- name: Determine if init.scope cgroup hierarchy exists
stat:
path: "/sys/fs/cgroup/pids/init.scope/pids.max"
register: init_scope_cgroup
when: ansible_service_mgr == 'systemd'
- block:
- name: Get init.scope pids.max value
slurp:
src: "/sys/fs/cgroup/pids/init.scope/pids.max"
register: init_scope_cgroup_pids_max
changed_when: false
- name: Get init.scope pids.max value
command: cat /sys/fs/cgroup/pids/init.scope/pids.max
register: init_scope_cgroup_pids_max
when:
- init_scope_cgroup.stat.exists
- ansible_service_mgr == 'systemd'
- name: Set systemd pids.max in init.scope
shell: "echo {{ lxc_default_tasks_max }} > /sys/fs/cgroup/pids/init.scope/pids.max"
when:
- init_scope_cgroup.stat.exists
- init_scope_cgroup_pids_max.stdout != lxc_default_tasks_max
- ansible_service_mgr == 'systemd'
# NOTE(cloudnull): The "shell" module is being used instead of "copy" with
# content as we need direct write access to the sysFS.
- name: Set systemd pids.max in init.scope
shell: "echo {{ lxc_default_tasks_max }} > /sys/fs/cgroup/pids/init.scope/pids.max"
when:
- (init_scope_cgroup_pids_max.content | b64decode) != lxc_default_tasks_max
tags:
- skip_ansible_lint
rescue:
- name: Notice regarding setting the init.scope/pids.max
debug:
msg: |
Failed writing to "/sys/fs/cgroup/pids/init.scope/pids.max". While
we prefer setting this option it's not required. See the following
issues on why this is desired and what can be done when the cgroup
hierarchy exists.
* https://bugs.launchpad.net/ubuntu/+source/lxc/+bug/1497420
* https://github.com/lxc/lxc/issues/713
* https://bugs.archlinux.org/index.php?do=details&action=details.addvote&task_id=47303
tags:
- lxc-config

View File

@ -0,0 +1,41 @@
# {{ ansible_managed }}
[Unit]
Description=lxc dnsmasq service
After=syslog.target
After=network.target
[Service]
Type=forking
User=root
{% if lxc_net_manage_iptables | bool %}
ExecStartPre=/usr/local/bin/lxc-system-manage iptables-create
{% endif %}
ExecStart=/usr/local/bin/lxc-system-manage dnsmasq-start
ExecStop=-/usr/local/bin/lxc-system-manage dnsmasq-stop
{% if lxc_net_manage_iptables | bool %}
ExecStopPost=-/usr/local/bin/lxc-system-manage iptables-remove
{% endif %}
PIDFile=/run/lxc/dnsmasq.pid
# Give a reasonable amount of time for the server to start up/shut down
TimeoutSec=120
Restart=on-failure
RestartSec=2
# This creates a specific slice which all services will operate from
# The accounting options give us the ability to see resource usage through
# the `systemd-cgtop` command.
Slice=lxc-dnsmasq.slice
CPUAccounting=true
BlockIOAccounting=true
MemoryAccounting=false
TasksAccounting=true
[Install]
WantedBy=multi-user.target

View File

@ -6,20 +6,6 @@ iface {{ lxc_net_bridge }} inet static
netmask {{ lxc_net_netmask }}
{% if lxc_net_gateway is not none %}
gateway {{ lxc_net_gateway }}
{% endif %}
{% if lxc_net_mtu is defined %}
mtu {{ lxc_net_mtu }}
{% endif %}
{% if lxc_net_nat | bool %}
# dnsmasq start and stop
{% if lxc_net_manage_iptables | bool %}
post-up /usr/local/bin/lxc-system-manage iptables-create
{% endif %}
post-up /usr/local/bin/lxc-system-manage dnsmasq-start || true
{% if lxc_net_manage_iptables | bool %}
post-down /usr/local/bin/lxc-system-manage iptables-remove
{% endif %}
post-down /usr/local/bin/lxc-system-manage dnsmasq-stop
{% endif %}
bridge_fd 0
bridge_maxwait 0

View File

@ -1,7 +0,0 @@
#!/usr/bin/env bash
if [ "{{ item.interface }}" == "{{ lxc_net_bridge }}" ];then
if [ "{{ lxc_net_nat }}" == "True" ];then
/usr/local/bin/lxc-system-manage iptables-create
/usr/local/bin/lxc-system-manage dnsmasq-start || true
fi
fi

View File

@ -99,10 +99,6 @@ lxc_cache_distro_packages:
lxc_cached_network_interfaces:
- src: "lxc-net-redhat-bridge.cfg.j2"
dest: "/etc/sysconfig/network-scripts/ifcfg-{{ lxc_net_bridge }}"
- src: "lxc-net-suseredhat-postup.cfg.j2"
dest: "/etc/sysconfig/network-scripts/ifup-post-{{ lxc_net_bridge }}"
mode: "0755"
interface: "${DEVICE}"
- src: "lxc-net-suseredhat-postdown.cfg.j2"
dest: "/etc/sysconfig/network-scripts/ifdown-post-{{ lxc_net_bridge }}"
mode: "0755"

View File

@ -102,10 +102,6 @@ lxc_cache_distro_packages:
lxc_cached_network_interfaces:
- src: "lxc-net-suse-bridge.cfg.j2"
dest: "/etc/sysconfig/network/ifcfg-{{ lxc_net_bridge }}"
- src: "lxc-net-suseredhat-postup.cfg.j2"
dest: "/etc/sysconfig/network/scripts/ifup-post-{{ lxc_net_bridge }}"
mode: "0755"
interface: "${1}"
- src: "lxc-net-suseredhat-postdown.cfg.j2"
dest: "/etc/sysconfig/network/scripts/ifdown-post-{{ lxc_net_bridge }}"
mode: "0755"