tripleo-ha-utils/roles/instance-ha/tasks/apply.yml

387 lines
16 KiB
YAML

---
- name: Apply STONITH for compute nodes
include_role:
name: stonith-config
vars:
stonith_devices: "computes"
when:
- stonith_devices in ["all","computes"]
- name: Disable openstack-nova-compute on compute
service:
name: openstack-nova-compute
state: stopped
enabled: no
become: yes
delegate_to: "{{ item }}"
with_items:
- "{{ groups['compute'] }}"
when: release not in [ 'pike', 'rhos-12' ]
- name: Disable neutron-openvswitch-agent on compute
service:
name: neutron-openvswitch-agent
state: stopped
enabled: no
become: yes
delegate_to: "{{ item }}"
with_items:
- "{{ groups['compute'] }}"
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
- name: Disable openstack-ceilometer-compute on compute
service:
name: openstack-ceilometer-compute
state: stopped
enabled: no
become: yes
delegate_to: "{{ item }}"
with_items:
- "{{ groups['compute'] }}"
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
- name: Disable libvirtd on compute
become: yes
service:
name: libvirtd
state: stopped
enabled: no
delegate_to: "{{ item }}"
with_items:
- "{{ groups['compute'] }}"
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
- name: Generate authkey for remote pacemaker
shell: |
dd if=/dev/urandom of="/tmp/authkey" bs=4096 count=1
delegate_to: localhost
- name: Make sure pacemaker config dir exists
become: yes
file:
path: /etc/pacemaker
state: directory
mode: 0750
group: "haclient"
delegate_to: "{{ item }}"
with_items:
- "{{ groups['controller'] }}"
- "{{ groups['compute'] }}"
- name: Copy authkey on all the overcloud nodes
become: yes
copy:
src: /tmp/authkey
dest: /etc/pacemaker/authkey
mode: 0640
group: "haclient"
delegate_to: "{{ item }}"
with_items:
- "{{ groups['controller'] }}"
- "{{ groups['compute'] }}"
- name: Remove authkey from local dir
file:
path: /tmp/authkey
state: absent
delegate_to: localhost
- name: Enable iptables traffic for pacemaker_remote
become: yes
shell: |
iptables -I INPUT -p tcp -m state --state NEW -m tcp --dport 3121 -j ACCEPT
delegate_to: "{{ item }}"
with_items:
- "{{ groups['controller'] }}"
- "{{ groups['compute'] }}"
- name: Make iptables pacemaker_remote rule permanent
become: yes
lineinfile:
path: /etc/sysconfig/iptables
line: "-A INPUT -p tcp -m state --state NEW -m tcp --dport 3121 -j ACCEPT"
insertafter: ":OUTPUT ACCEPT"
delegate_to: "{{ item }}"
with_items:
- "{{ groups['controller'] }}"
- "{{ groups['compute'] }}"
- name: Start pacemaker remote service on compute nodes
become: yes
service:
name: pacemaker_remote
enabled: yes
state: started
delegate_to: "{{ item }}"
with_items:
- "{{ groups['compute'] }}"
- name: Get the name of the stack
shell: |
source {{ working_dir }}/stackrc
openstack stack list -f value -c 'Stack Name'
register: stack_name
- name: Check if a v3 overcloud's rc file exists
stat:
path: "{{ working_dir }}/{{ stack_name.stdout }}rc.v3"
register: v3_rc_file_stat
- name: Get the contents of the overcloud's rc file v3
set_fact:
overcloudrc: "{{ stack_name.stdout }}rc.v3"
when: v3_rc_file_stat.stat.exists
- name: Get the contents of the overcloud's rc file
set_fact:
overcloudrc: "{{ stack_name.stdout }}rc"
when: not v3_rc_file_stat.stat.exists
- block:
- name: Get OS_USERNAME from overcloudrc
shell: |
grep OS_USERNAME {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_USERNAME=//g'
register: "OS_USERNAME"
- name: Get OS_PASSWORD from overcloudrc
shell: |
grep OS_PASSWORD {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_PASSWORD=//g'
register: "OS_PASSWORD"
- name: Get OS_AUTH_URL from overcloudrc
shell: |
grep OS_AUTH_URL {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_AUTH_URL=//g'
register: "OS_AUTH_URL"
- name: Get OS_PROJECT_NAME or OS_TENANT_NAME from overcloudrc
shell: |
grep -E 'OS_PROJECT_NAME|OS_TENANT_NAME' {{ working_dir }}/{{ overcloudrc }} | tail -1 | sed 's/export OS_.*_NAME=//g'
register: "OS_TENANT_NAME"
- name: Get OS_USER_DOMAIN_NAME from overcloudrc
shell: |
grep OS_USER_DOMAIN_NAME {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_USER_DOMAIN_NAME=//g'
register: "OS_USER_DOMAIN_NAME"
when: v3_rc_file_stat.stat.exists
- name: Get OS_PROJECT_DOMAIN_NAME from overcloudrc
shell: |
grep OS_PROJECT_DOMAIN_NAME {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_PROJECT_DOMAIN_NAME=//g'
register: "OS_PROJECT_DOMAIN_NAME"
when: v3_rc_file_stat.stat.exists
- name: Define variable for pcs additional options for overcloud's rc file v3
set_fact:
pcs_v3_rc_file_opts: ""
- name: Define variable for pcs additional options for no_shared_storage
set_fact:
pcs_NovaEvacuate_no_shared_storage_opts: ""
pcs_fence_compute_no_shared_storage_opts: ""
- name: Set pcs additional options for overcloud's rc file v3
set_fact:
pcs_v3_rc_file_opts: "project_domain=$OS_PROJECT_DOMAIN_NAME user_domain=$OS_USER_DOMAIN_NAME"
when: v3_rc_file_stat.stat.exists
- name: Set pcs additional options for no_shared_storage
set_fact:
pcs_NovaEvacuate_no_shared_storage_opts: "no_shared_storage=1"
pcs_fence_compute_no_shared_storage_opts: "no-shared-storage=True"
when: not instance_ha_shared_storage|bool
- block:
- name: Create resource nova-evacuate
shell: |
pcs resource create nova-evacuate ocf:openstack:NovaEvacuate auth_url=$OS_AUTH_URL username=$OS_USERNAME password=$OS_PASSWORD tenant_name=$OS_TENANT_NAME {{ pcs_v3_rc_file_opts }} {{ pcs_NovaEvacuate_no_shared_storage_opts }} --force
- name: Create pacemaker constraint to start nova-evacuate only on non compute nodes
shell: |
pcs constraint location nova-evacuate rule resource-discovery=never score=-INFINITY osprole eq compute
- name: Create pacemaker constraints to start VIP resources before nova-evacuate
shell: |
for i in $(pcs status | grep IP | awk '{ print $1 }')
do pcs constraint order start $i then nova-evacuate
done
- name: Create pacemaker constraints to start openstack services before nova-evacuate
shell: "pcs constraint order start {{ item }} then nova-evacuate require-all=false"
with_items:
- openstack-glance-api-clone
- neutron-metadata-agent-clone
- openstack-nova-conductor-clone
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
- name: Disable keystone resource
shell: "pcs resource disable openstack-keystone --wait=900"
when: release in [ 'liberty', 'rhos-8' ]
# Keystone resource was replaced by openstack-core resource in RHOS9
- name: Disable openstack-core resource
shell: "pcs resource disable openstack-core --wait=900"
when: release in [ 'mitaka', 'rhos-9' ]
- name: Set controller pacemaker property on controllers
shell: "pcs property set --node {{ hostvars[item]['ansible_hostname'] }} osprole=controller"
with_items: "{{ groups['controller'] }}"
- name: Get stonith devices
shell: "pcs stonith | awk '{print $1}' | tr '\n' ' '"
register: stonithdevs
- name: Setup stonith devices
shell: |
for i in $(cibadmin -Q --xpath //primitive --node-path | awk -F "id='" '{print $2}' | awk -F "'" '{print $1}' | uniq); do
found=0
if [ -n "{{ stonithdevs.stdout }}" ]; then
for x in {{ stonithdevs.stdout }}; do
if [ "$x" == "$i" ]; then
found=1
fi
done
fi
if [ $found = 0 ]; then
pcs constraint location $i rule resource-discovery=exclusive score=0 osprole eq controller
fi
done
when: release not in [ 'pike', 'rhos-12' ]
- name: Create compute pacemaker resources and constraints
shell: |
pcs resource create nova-compute-checkevacuate ocf:openstack:nova-compute-wait auth_url=$OS_AUTH_URL username=$OS_USERNAME password=$OS_PASSWORD tenant_name=$OS_TENANT_NAME domain={{ overcloud_domain }} op start timeout=300 --clone interleave=true --disabled --force
pcs constraint location nova-compute-checkevacuate-clone rule resource-discovery=exclusive score=0 osprole eq compute
pcs resource create nova-compute systemd:openstack-nova-compute op start timeout=60s --clone interleave=true --disabled --force
pcs constraint location nova-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
pcs constraint order start nova-compute-checkevacuate-clone then nova-compute-clone require-all=true
pcs constraint order start nova-compute-clone then nova-evacuate require-all=false
when: release not in [ 'pike', 'rhos-12' ]
- name: Create compute pacemaker resources and constraints
shell: |
pcs resource create neutron-openvswitch-agent-compute systemd:neutron-openvswitch-agent --clone interleave=true --disabled --force
pcs constraint location neutron-openvswitch-agent-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
pcs resource create libvirtd-compute systemd:libvirtd --clone interleave=true --disabled --force
pcs constraint location libvirtd-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
pcs constraint order start neutron-openvswitch-agent-compute-clone then libvirtd-compute-clone
pcs constraint colocation add libvirtd-compute-clone with neutron-openvswitch-agent-compute-clone
pcs resource create ceilometer-compute systemd:openstack-ceilometer-compute --clone interleave=true --disabled --force
pcs constraint location ceilometer-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
pcs constraint order start libvirtd-compute-clone then ceilometer-compute-clone
pcs constraint colocation add ceilometer-compute-clone with libvirtd-compute-clone
pcs constraint order start libvirtd-compute-clone then nova-compute-clone
pcs constraint colocation add nova-compute-clone with libvirtd-compute-clone
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
- name: Create pacemaker constraint for neutron-server, nova-conductor and ceilometer-notification
shell: |
pcs constraint order start neutron-server-clone then neutron-openvswitch-agent-compute-clone require-all=false
pcs constraint order start openstack-ceilometer-notification-clone then ceilometer-compute-clone require-all=false
pcs constraint order start openstack-nova-conductor-clone then nova-compute-checkevacuate-clone require-all=false
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
- name: Set requires to fencing as default for all resources (Pike/RHOS-12)
shell: "pcs resource defaults requires=fencing"
when: release in [ 'pike', 'rhos-12' ]
- name: Create fence-nova pacemaker resource (no shared storage)
shell: "pcs stonith create fence-nova fence_compute auth-url=$OS_AUTH_URL login=$OS_USERNAME passwd=$OS_PASSWORD tenant-name=$OS_TENANT_NAME domain={{ overcloud_domain }} record-only=1 {{ pcs_fence_compute_no_shared_storage_opts }} --force"
when: release not in [ 'pike', 'rhos-12' ]
- name: Create fence-nova pacemaker resource (Pike/RHOS-12)
shell: "pcs stonith create fence-nova fence_compute auth-url=$OS_AUTH_URL login=$OS_USERNAME passwd=$OS_PASSWORD tenant-name=$OS_TENANT_NAME domain={{ overcloud_domain }} record-only=1 {{ pcs_fence_compute_no_shared_storage_opts }} meta provides=unfencing --force"
when: release in [ 'pike', 'rhos-12' ]
- name: Create pacemaker constraint for fence-nova to fix it on controller node and set resource-discovery never
shell: "pcs constraint location fence-nova rule resource-discovery=never score=0 osprole eq controller"
- name: Create pacemaker constraint for fence-nova to start after galera
shell: "pcs constraint order promote galera-master then fence-nova require-all=false"
when: release not in [ 'pike', 'rhos-12' ]
- name: Create nova-compute order constraint on fence-nova
shell: "pcs constraint order start fence-nova then nova-compute-clone"
when: release not in [ 'pike', 'rhos-12' ]
- name: Set cluster recheck interval to 1 minute
shell: "pcs property set cluster-recheck-interval=1min"
- name: Create pacemaker remote resource on compute nodes
shell: "pcs resource create {{ hostvars[item]['ansible_hostname'] }} ocf:pacemaker:remote reconnect_interval=240 op monitor interval=20"
with_items: "{{ groups['compute'] }}"
- name: Set osprole for compute nodes
shell: "pcs property set --node {{ hostvars[item]['ansible_hostname'] }} osprole=compute"
with_items: "{{ groups['compute'] }}"
- name: Add STONITH level definitions for compute nodes
shell: |
compute_stonith_name=$(cibadmin --query --xpath "//primitive[@class='stonith']/instance_attributes/nvpair[@value='{{ item }}']" | sed 's/.*id="\(.*\)-instance_attributes-pcmk_host_list".*/\1/g')
pcs stonith level add 1 {{ item }} $compute_stonith_name,fence-nova
with_items: "{{ groups['compute'] }}"
- name: Enable keystone resource
shell: "pcs resource enable openstack-keystone"
when: release in [ 'liberty', 'rhos-8' ]
- name: Enable openstack-core resource
shell: "pcs resource enable openstack-core"
when: release in [ 'mitaka', 'rhos-9' ]
- name: Wait for httpd service to be started
shell: "systemctl show httpd --property=ActiveState"
register: httpd_status_result
until: httpd_status_result.stdout.find('inactive') == -1 and httpd_status_result.stdout.find('activating') == -1
retries: 30
delay: 10
when: release not in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
- name: Enable compute nodes resources (nova)
shell: "pcs resource enable {{ item }}"
with_items:
- nova-compute-checkevacuate
- nova-compute
when: release not in [ 'pike', 'rhos-12' ]
- name: Create compute unfence resource to override default resource requires (Pike/RHOS-12)
shell: |
pcs resource create compute-unfence-trigger ocf:pacemaker:Dummy op start requires="unfencing" --clone --disabled
pcs constraint location compute-unfence-trigger-clone rule resource-discovery=never score=-INFINITY osprole ne compute
pcs resource enable compute-unfence-trigger
when: release in [ 'pike', 'rhos-12' ]
- name: Enable compute nodes resources (others)
shell: "pcs resource enable {{ item }}"
with_items:
- neutron-openvswitch-agent-compute
- libvirtd-compute
- ceilometer-compute
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
environment:
OS_USERNAME: "{{ OS_USERNAME.stdout }}"
OS_PASSWORD: "{{ OS_PASSWORD.stdout }}"
OS_AUTH_URL: "{{ OS_AUTH_URL.stdout }}"
OS_TENANT_NAME: "{{ OS_TENANT_NAME.stdout }}"
OS_USER_DOMAIN_NAME: "{{ OS_USER_DOMAIN_NAME.stdout }}"
OS_PROJECT_DOMAIN_NAME: "{{ OS_PROJECT_DOMAIN_NAME.stdout }}"
become: yes
delegate_to: "{{ groups.controller[0] }}"
- name: Cleanup (if any) failed resources
shell: |
for resource in $(pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq)
do
pcs resource cleanup $resource
done
become: yes
delegate_to: "{{ groups.controller[0] }}"
- name: Wait for (if any) failed resources to recover
shell: pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq
register: failed_resources
until: failed_resources.stdout != []
retries: 10
delay: 10
become: yes
delegate_to: "{{ groups.controller[0] }}"