From a6389da22d43fa61d4db16b676b23c3a4c468dfd Mon Sep 17 00:00:00 2001 From: Michele Baldessari Date: Tue, 5 Jun 2018 14:19:24 +0000 Subject: [PATCH] Introduce restart_bundle containers to detect config changes and restart pacemaker resources During the containerization work we regressed on the restart of pacemaker resources when a config change for the service was detected. In baremetal we used to do the following: 1) If a puppet config change was detect we'd touch a file with the service name under /var/lib/tripleo/pacemaker-restarts/ 2) A post deployment bash script (extraconfig/tasks/pacemaker_resource_restart.sh) would test for the service file's existence and restart the pcs service via 'pcs resource restart --wait=600 service' on the bootstrap node. With this patchset we make use of paunch's ability do detect if a config hash change happened to respawn a temporary container (called _restart_bundle) which will simply always restart the pacemaker service from the bootstrap node whenever invoked, but only if the pcmk resource already exists. For this reason we add config_volume and bind mount it inside the container, so that the TRIPLEO_CONFIG_HASH env variable gets generated for these *_restart_bundle containers. We tested this change as follows: A) Deployed an HA overcloud with this change and observed that pcmk resources were not restarted needlessly during initial deploy B) Rerun the exact same overcloud deploy with no changes, observed that no spurious restarts would take place C) Added an env file to trigger the of config of haproxy[1], redeployed and observed that it restarted haproxy only: Jun 06 16:22:37 overcloud-controller-0 dockerd-current[15272]: haproxy-bundle restart invoked D) Added a trigger [2] for mysql config change, redeployed and observed restart: Jun 06 16:40:52 overcloud-controller-0 dockerd-current[15272]: galera-bundle restart invoked E) Added a trigger [3] for a rabbitmq config change, redeployed and observed restart: Jun 06 17:03:41 overcloud-controller-0 dockerd-current[15272]: rabbitmq-bundle restart invoked F) Added a trigger [4] for a redis config change, redeployed and observed restart: Jun 07 08:42:54 overcloud-controller-0 dockerd-current[15272]: redis-bundle restart invoked G) Rerun a deploy with no changes and observed that no spurious restarts were triggered [1] haproxy config change trigger: parameter_defaults: ExtraConfig: tripleo::haproxy::haproxy_globals_override: 'maxconn': 1111 [2] mysql config change trigger: parameter_defaults: ExtraConfig: mysql_max_connections: 1111 [3] rabbitmq config change trigger (default partition handling is 'ignore'): parameter_defaults: ExtraConfig: rabbitmq_config_variables: cluster_partition_handling: 'pause_minority' queue_master_locator: '<<"min-masters">>' loopback_users: '[]' [4] redis config change trigger: parameter_defaults: ExtraConfig: redis::tcp_backlog: 666 redis::params::tcp_backlog: 666 Change-Id: I62870c055097569ceab2ff67cf0fe63122277c5b Co-Authored-By: Damien Ciabrini Closes-Bug: #1775196 --- docker/services/pacemaker/cinder-backup.yaml | 27 +++++++++++++++++ docker/services/pacemaker/cinder-volume.yaml | 29 ++++++++++++++++++- docker/services/pacemaker/database/mysql.yaml | 27 +++++++++++++++++ docker/services/pacemaker/database/redis.yaml | 27 +++++++++++++++++ docker/services/pacemaker/haproxy.yaml | 27 +++++++++++++++++ docker/services/pacemaker/manila-share.yaml | 29 ++++++++++++++++++- .../services/pacemaker/notify-rabbitmq.yaml | 29 ++++++++++++++++++- docker/services/pacemaker/ovn-dbs.yaml | 27 +++++++++++++++++ docker/services/pacemaker/rabbitmq.yaml | 29 ++++++++++++++++++- docker/services/pacemaker/rpc-rabbitmq.yaml | 29 ++++++++++++++++++- 10 files changed, 275 insertions(+), 5 deletions(-) diff --git a/docker/services/pacemaker/cinder-backup.yaml b/docker/services/pacemaker/cinder-backup.yaml index 3e7d4ed976..28c0f5c74b 100644 --- a/docker/services/pacemaker/cinder-backup.yaml +++ b/docker/services/pacemaker/cinder-backup.yaml @@ -52,6 +52,11 @@ parameters: default: false description: Whether to run config management (e.g. Puppet) in debug mode. type: boolean + PcmkConfigRestartTimeout: + default: 600 + description: Time in seconds to wait for a pcmk resource to restart when + a config change is detected and the resource is being restarted + type: number conditions: puppet_debug_enabled: {get_param: ConfigDebug} @@ -170,6 +175,28 @@ outputs: - /var/log/containers/cinder:/var/log/cinder command: ['/bin/bash', '-c', 'chown -R cinder:cinder /var/log/cinder'] step_5: + cinder_backup_restart_bundle: + start_order: 0 + config_volume: cinder + detach: false + net: host + user: root + command: + - '/usr/bin/bootstrap_host_exec' + - 'cinder_backup' + - str_replace: + template: + 'if /usr/sbin/pcs resource show openstack-cinder-backup; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT openstack-cinder-backup; echo "openstack-cinder-backup restart invoked"; fi' + params: + PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout} + image: {get_param: DockerCinderBackupImage} + volumes: + list_concat: + - {get_attr: [ContainersCommon, volumes]} + - + - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro + - /dev/shm:/dev/shm:rw + - /var/lib/config-data/puppet-generated/cinder/:/var/lib/kolla/config_files/src:ro cinder_backup_init_bundle: start_order: 1 detach: false diff --git a/docker/services/pacemaker/cinder-volume.yaml b/docker/services/pacemaker/cinder-volume.yaml index 02e7d57334..e141e3cd9b 100644 --- a/docker/services/pacemaker/cinder-volume.yaml +++ b/docker/services/pacemaker/cinder-volume.yaml @@ -49,6 +49,11 @@ parameters: default: false description: Whether to run config management (e.g. Puppet) in debug mode. type: boolean + PcmkConfigRestartTimeout: + default: 600 + description: Time in seconds to wait for a pcmk resource to restart when + a config change is detected and the resource is being restarted + type: number conditions: puppet_debug_enabled: {get_param: ConfigDebug} @@ -164,8 +169,30 @@ outputs: - /var/log/containers/cinder:/var/log/cinder command: ['/bin/bash', '-c', 'chown -R cinder:cinder /var/log/cinder'] step_5: - cinder_volume_init_bundle: + cinder_volume_restart_bundle: start_order: 0 + config_volume: cinder + detach: false + net: host + user: root + command: + - '/usr/bin/bootstrap_host_exec' + - 'cinder_volume' + - str_replace: + template: + 'if /usr/sbin/pcs resource show openstack-cinder-volume; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT openstack-cinder-volume; echo "openstack-cinder-volume restart invoked"; fi' + params: + PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout} + image: {get_param: DockerCinderVolumeImage} + volumes: + list_concat: + - {get_attr: [ContainersCommon, volumes]} + - + - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro + - /dev/shm:/dev/shm:rw + - /var/lib/config-data/puppet-generated/cinder/:/var/lib/kolla/config_files/src:ro + cinder_volume_init_bundle: + start_order: 1 detach: false net: host user: root diff --git a/docker/services/pacemaker/database/mysql.yaml b/docker/services/pacemaker/database/mysql.yaml index 109c31b114..1049ed58fe 100644 --- a/docker/services/pacemaker/database/mysql.yaml +++ b/docker/services/pacemaker/database/mysql.yaml @@ -61,6 +61,11 @@ parameters: description: > Setting this to a unique value will re-run any deployment tasks which perform configuration on a Heat stack-update. + PcmkConfigRestartTimeout: + default: 600 + description: Time in seconds to wait for a pcmk resource to restart when + a config change is detected and the resource is being restarted + type: number resources: @@ -247,6 +252,28 @@ outputs: - /usr/bin:/usr/bin:ro - /var/run/docker.sock:/var/run/docker.sock:rw step_2: + mysql_restart_bundle: + start_order: 0 + config_volume: mysql + detach: false + net: host + user: root + command: + - '/usr/bin/bootstrap_host_exec' + - 'mysql' + - str_replace: + template: + 'if /usr/sbin/pcs resource show galera-bundle; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT galera-bundle; echo "galera-bundle restart invoked"; fi' + params: + PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout} + image: {get_param: DockerMysqlImage} + volumes: + list_concat: + - {get_attr: [ContainersCommon, volumes]} + - + - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro + - /dev/shm:/dev/shm:rw + - /var/lib/config-data/puppet-generated/mysql/:/var/lib/kolla/config_files/src:ro mysql_init_bundle: start_order: 1 detach: false diff --git a/docker/services/pacemaker/database/redis.yaml b/docker/services/pacemaker/database/redis.yaml index b4a5b4b5a6..c4516beb3a 100644 --- a/docker/services/pacemaker/database/redis.yaml +++ b/docker/services/pacemaker/database/redis.yaml @@ -47,6 +47,11 @@ parameters: default: false description: Enable IPv6 in Redis type: boolean + PcmkConfigRestartTimeout: + default: 600 + description: Time in seconds to wait for a pcmk resource to restart when + a config change is detected and the resource is being restarted + type: number conditions: puppet_debug_enabled: {get_param: ConfigDebug} @@ -201,6 +206,28 @@ outputs: - /var/run/docker.sock:/var/run/docker.sock:rw step_2: map_merge: + - redis_restart_bundle: + start_order: 1 + config_volume: redis + detach: false + net: host + user: root + command: + - '/usr/bin/bootstrap_host_exec' + - 'redis' + - str_replace: + template: + 'if /usr/sbin/pcs resource show redis-bundle; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT redis-bundle; echo "redis-bundle restart invoked"; fi' + params: + PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout} + image: {get_param: DockerRedisConfigImage} + volumes: + list_concat: + - {get_attr: [ContainersCommon, volumes]} + - + - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro + - /dev/shm:/dev/shm:rw + - /var/lib/config-data/puppet-generated/redis/:/var/lib/kolla/config_files/src:ro - redis_init_bundle: start_order: 2 detach: false diff --git a/docker/services/pacemaker/haproxy.yaml b/docker/services/pacemaker/haproxy.yaml index 0516efbee3..21826a4a54 100644 --- a/docker/services/pacemaker/haproxy.yaml +++ b/docker/services/pacemaker/haproxy.yaml @@ -76,6 +76,11 @@ parameters: default: false description: Whether to run config management (e.g. Puppet) in debug mode. type: boolean + PcmkConfigRestartTimeout: + default: 600 + description: Time in seconds to wait for a pcmk resource to restart when + a config change is detected and the resource is being restarted + type: number conditions: puppet_debug_enabled: {get_param: ConfigDebug} @@ -215,6 +220,28 @@ outputs: - /var/run/docker.sock:/var/run/docker.sock:rw image: {get_param: DockerHAProxyImage} step_2: + haproxy_restart_bundle: + start_order: 2 + detach: false + net: host + user: root + config_volume: haproxy + command: + - '/usr/bin/bootstrap_host_exec' + - 'haproxy' + - str_replace: + template: + 'if /usr/sbin/pcs resource show haproxy-bundle; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT haproxy-bundle; echo "haproxy-bundle restart invoked"; fi' + params: + PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout} + image: {get_param: DockerHAProxyImage} + volumes: + list_concat: + - {get_attr: [ContainersCommon, volumes]} + - + - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro + - /dev/shm:/dev/shm:rw + - /var/lib/config-data/puppet-generated/haproxy/:/var/lib/kolla/config_files/src:ro haproxy_init_bundle: start_order: 3 detach: false diff --git a/docker/services/pacemaker/manila-share.yaml b/docker/services/pacemaker/manila-share.yaml index 98cc31eed7..ff498444eb 100644 --- a/docker/services/pacemaker/manila-share.yaml +++ b/docker/services/pacemaker/manila-share.yaml @@ -40,6 +40,11 @@ parameters: default: false description: Whether to run config management (e.g. Puppet) in debug mode. type: boolean + PcmkConfigRestartTimeout: + default: 600 + description: Time in seconds to wait for a pcmk resource to restart when + a config change is detected and the resource is being restarted + type: number conditions: puppet_debug_enabled: {get_param: ConfigDebug} @@ -140,8 +145,30 @@ outputs: - /var/log/containers/manila:/var/log/manila command: ['/bin/bash', '-c', 'chown -R manila:manila /var/log/manila'] step_5: - manila_share_init_bundle: + manila_share_restart_bundle: start_order: 0 + config_volume: manila + detach: false + net: host + user: root + command: + - '/usr/bin/bootstrap_host_exec' + - 'manila-share' + - str_replace: + template: + 'if /usr/sbin/pcs resource show manila-share; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT manila-share; echo "manila-share restart invoked"; fi' + params: + PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout} + image: {get_param: DockerManilaShareImage} + volumes: + list_concat: + - {get_attr: [ContainersCommon, volumes]} + - + - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro + - /dev/shm:/dev/shm:rw + - /var/lib/config-data/puppet-generated/manila/:/var/lib/kolla/config_files/src:ro + manila_share_init_bundle: + start_order: 1 detach: false net: host user: root diff --git a/docker/services/pacemaker/notify-rabbitmq.yaml b/docker/services/pacemaker/notify-rabbitmq.yaml index 62702adc79..9420f866bc 100644 --- a/docker/services/pacemaker/notify-rabbitmq.yaml +++ b/docker/services/pacemaker/notify-rabbitmq.yaml @@ -44,6 +44,11 @@ parameters: default: false description: Whether to run config management (e.g. Puppet) in debug mode. type: boolean + PcmkConfigRestartTimeout: + default: 600 + description: Time in seconds to wait for a pcmk resource to restart when + a config change is detected and the resource is being restarted + type: number conditions: puppet_debug_enabled: {get_param: ConfigDebug} @@ -186,8 +191,30 @@ outputs: - /usr/bin:/usr/bin:ro - /var/run/docker.sock:/var/run/docker.sock:rw step_2: - rabbitmq_init_bundle: + rabbitmq_restart_bundle: start_order: 0 + config_volume: rabbitmq + detach: false + net: host + user: root + command: + - '/usr/bin/bootstrap_host_exec' + - 'rabbitmq' + - str_replace: + template: + 'if /usr/sbin/pcs resource show rabbitmq-bundle; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT rabbitmq-bundle; echo "rabbitmq-bundle restart invoked"; fi' + params: + PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout} + image: {get_param: DockerRabbitmqImage} + volumes: + list_concat: + - {get_attr: [ContainersCommon, volumes]} + - + - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro + - /dev/shm:/dev/shm:rw + - /var/lib/config-data/puppet-generated/rabbitmq/:/var/lib/kolla/config_files/src:ro + rabbitmq_init_bundle: + start_order: 1 detach: false net: host user: root diff --git a/docker/services/pacemaker/ovn-dbs.yaml b/docker/services/pacemaker/ovn-dbs.yaml index 4dfab0bcb8..0c5de0cd34 100644 --- a/docker/services/pacemaker/ovn-dbs.yaml +++ b/docker/services/pacemaker/ovn-dbs.yaml @@ -48,6 +48,11 @@ parameters: default: false description: Whether to run config management (e.g. Puppet) in debug mode. type: boolean + PcmkConfigRestartTimeout: + default: 600 + description: Time in seconds to wait for a pcmk resource to restart when + a config change is detected and the resource is being restarted + type: number conditions: puppet_debug_enabled: {get_param: ConfigDebug} @@ -105,6 +110,28 @@ outputs: docker_config_scripts: {get_attr: [ContainersCommon, docker_config_scripts]} docker_config: step_3: + ovn_dbs_restart_bundle: + start_order: 0 + config_volume: ovn_dbs + detach: false + net: host + user: root + command: + - '/usr/bin/bootstrap_host_exec' + - 'ovn_dbs' + - str_replace: + template: + 'if /usr/sbin/pcs resource show ovn-dbs-bundle; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT ovn-dbs-bundle; echo "ovn-dbs-bundle restart invoked"; fi' + params: + PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout} + image: {get_param: DockerOvnDbsConfigImage} + volumes: + list_concat: + - {get_attr: [ContainersCommon, volumes]} + - + - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro + - /dev/shm:/dev/shm:rw + - /var/lib/config-data/puppet-generated/ovn_dbs/:/var/lib/kolla/config_files/src:ro ovn_dbs_init_bundle: start_order: 1 detach: false diff --git a/docker/services/pacemaker/rabbitmq.yaml b/docker/services/pacemaker/rabbitmq.yaml index 097c304bcd..02902bafa6 100644 --- a/docker/services/pacemaker/rabbitmq.yaml +++ b/docker/services/pacemaker/rabbitmq.yaml @@ -44,6 +44,11 @@ parameters: default: false description: Whether to run config management (e.g. Puppet) in debug mode. type: boolean + PcmkConfigRestartTimeout: + default: 600 + description: Time in seconds to wait for a pcmk resource to restart when + a config change is detected and the resource is being restarted + type: number conditions: puppet_debug_enabled: {get_param: ConfigDebug} @@ -185,8 +190,30 @@ outputs: - /usr/bin:/usr/bin:ro - /var/run/docker.sock:/var/run/docker.sock:rw step_2: - rabbitmq_init_bundle: + rabbitmq_restart_bundle: start_order: 0 + config_volume: rabbitmq + detach: false + net: host + user: root + command: + - '/usr/bin/bootstrap_host_exec' + - 'rabbitmq' + - str_replace: + template: + 'if /usr/sbin/pcs resource show rabbitmq-bundle; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT rabbitmq-bundle; echo "rabbitmq-bundle restart invoked"; fi' + params: + PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout} + image: {get_param: DockerRabbitmqImage} + volumes: + list_concat: + - {get_attr: [ContainersCommon, volumes]} + - + - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro + - /dev/shm:/dev/shm:rw + - /var/lib/config-data/puppet-generated/rabbitmq/:/var/lib/kolla/config_files/src:ro + rabbitmq_init_bundle: + start_order: 1 detach: false net: host user: root diff --git a/docker/services/pacemaker/rpc-rabbitmq.yaml b/docker/services/pacemaker/rpc-rabbitmq.yaml index 01ce584ee3..9900e6d404 100644 --- a/docker/services/pacemaker/rpc-rabbitmq.yaml +++ b/docker/services/pacemaker/rpc-rabbitmq.yaml @@ -44,6 +44,11 @@ parameters: default: false description: Whether to run config management (e.g. Puppet) in debug mode. type: boolean + PcmkConfigRestartTimeout: + default: 600 + description: Time in seconds to wait for a pcmk resource to restart when + a config change is detected and the resource is being restarted + type: number conditions: puppet_debug_enabled: {get_param: ConfigDebug} @@ -186,8 +191,30 @@ outputs: - /usr/bin:/usr/bin:ro - /var/run/docker.sock:/var/run/docker.sock:rw step_2: - rabbitmq_init_bundle: + rabbitmq_restart_bundle: start_order: 0 + config_volume: rabbitmq + detach: false + net: host + user: root + command: + - '/usr/bin/bootstrap_host_exec' + - 'rabbitmq' + - str_replace: + template: + 'if /usr/sbin/pcs resource show rabbitmq-bundle; then /usr/sbin/pcs resource restart --wait=PCMKTIMEOUT rabbitmq-bundle; echo "rabbitmq-bundle restart invoked"; fi' + params: + PCMKTIMEOUT: {get_param: PcmkConfigRestartTimeout} + image: {get_param: DockerRabbitmqImage} + volumes: + list_concat: + - {get_attr: [ContainersCommon, volumes]} + - + - /etc/corosync/corosync.conf:/etc/corosync/corosync.conf:ro + - /dev/shm:/dev/shm:rw + - /var/lib/config-data/puppet-generated/rabbitmq/:/var/lib/kolla/config_files/src:ro + rabbitmq_init_bundle: + start_order: 1 detach: false net: host user: root