Fix race during major-upgrade-pacemaker step

Currently when we call the major-upgrade step we do the following:
"""
...
if [[ -n $(is_bootstrap_node) ]]; then
    check_clean_cluster
fi
...
if [[ -n $(is_bootstrap_node) ]]; then
    migrate_full_to_ng_ha
fi
...
for service in $(services_to_migrate); do
    manage_systemd_service stop "${service%%-clone}"
    ...
done
"""

The problem with the above code is that it is open to the following race
condition:
1. Code gets run first on a non-bootstrap controller node so we start
stopping a bunch of services
2. Pacemaker notices will notice that services are down and will mark
the service as stopped
3. Code gets run on the bootstrap node (controller-0) and the
check_clean_cluster function will fail and exit
4. Eventually also the script on the non-bootstrap controller node will
timeout and exit because the cluster never shut down (it never actually
started the shutdown because we failed at 3)

Let's make sure we first only call the HA NG migration step as a
separate heat step. Only afterwards we start shutting down the systemd
services on all nodes.

We also need to move the STONITH_STATE variable into a file because it
is being used across two different scripts (1 and 2) and we need to
store that state.

Co-Authored-By: Athlan-Guyot Sofer <sathlang@redhat.com>

Closes-Bug: #1640407
Change-Id: Ifb9b9e633fcc77604cca2590071656f4b2275c60
This commit is contained in:
Michele Baldessari 2016-11-09 09:05:08 +01:00
parent 465324cb6a
commit dde12b075f
8 changed files with 321 additions and 269 deletions

View File

@ -18,14 +18,8 @@ check_pcsd()
fi
}
check_disk_for_mysql_dump()
mysql_need_update()
{
# Where to backup current database if mysql need to be upgraded
MYSQL_BACKUP_DIR=/var/tmp/mysql_upgrade_osp
MYSQL_TEMP_UPGRADE_BACKUP_DIR=/var/lib/mysql-temp-upgrade-backup
# Spare disk ratio for extra safety
MYSQL_BACKUP_SIZE_RATIO=1.2
# Shall we upgrade mysql data directory during the stack upgrade?
if [ "$mariadb_do_major_upgrade" = "auto" ]; then
ret=$(is_mysql_upgrade_needed)
@ -40,6 +34,17 @@ check_disk_for_mysql_dump()
else
DO_MYSQL_UPGRADE=1
fi
}
check_disk_for_mysql_dump()
{
# Where to backup current database if mysql need to be upgraded
MYSQL_BACKUP_DIR=/var/tmp/mysql_upgrade_osp
MYSQL_TEMP_UPGRADE_BACKUP_DIR=/var/lib/mysql-temp-upgrade-backup
# Spare disk ratio for extra safety
MYSQL_BACKUP_SIZE_RATIO=1.2
mysql_need_update
if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
if [ $DO_MYSQL_UPGRADE -eq 1 ]; then

View File

@ -2,8 +2,6 @@
set -eu
cluster_sync_timeout=1800
check_cluster
check_pcsd
if [[ -n $(is_bootstrap_node) ]]; then
@ -19,6 +17,11 @@ check_disk_for_mysql_dump
# at the end of this script
if [[ -n $(is_bootstrap_node) ]]; then
STONITH_STATE=$(pcs property show stonith-enabled | grep "stonith-enabled" | awk '{ print $2 }')
# We create this empty file if stonith was set to true so we can reenable stonith in step2
rm -f /var/tmp/stonith-true
if [ $STONITH_STATE == "true" ]; then
touch /var/tmp/stonith-true
fi
pcs property set stonith-enabled=false
fi
@ -31,178 +34,3 @@ if [[ -n $(is_bootstrap_node) ]]; then
rabbitmq_mitaka_newton_upgrade
fi
# After migrating the cluster to HA-NG the services not under pacemaker's control
# are still up and running. We need to stop them explicitely otherwise during the yum
# upgrade the rpm %post sections will try to do a systemctl try-restart <service>, which
# is going to take a long time because rabbit is down. By having the service stopped
# systemctl try-restart is a noop
for service in $(services_to_migrate); do
manage_systemd_service stop "${service%%-clone}"
# So the reason for not reusing check_resource_systemd is that
# I have observed systemctl is-active returning unknown with at least
# one service that was stopped (See LP 1627254)
timeout=600
tstart=$(date +%s)
tend=$(( $tstart + $timeout ))
check_interval=3
while (( $(date +%s) < $tend )); do
if [[ "$(systemctl is-active ${service%%-clone})" = "active" ]]; then
echo "$service still active, sleeping $check_interval seconds."
sleep $check_interval
else
# we do not care if it is inactive, unknown or failed as long as it is
# not running
break
fi
done
done
# In case the mysql package is updated, the database on disk must be
# upgraded as well. This typically needs to happen during major
# version upgrades (e.g. 5.5 -> 5.6, 5.5 -> 10.1...)
#
# Because in-place upgrades are not supported across 2+ major versions
# (e.g. 5.5 -> 10.1), we rely on logical upgrades via dump/restore cycle
# https://bugzilla.redhat.com/show_bug.cgi?id=1341968
#
# The default is to determine automatically if upgrade is needed based
# on mysql package versionning, but this can be overriden manually
# to support specific upgrade scenario
if [[ -n $(is_bootstrap_node) ]]; then
if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
mysqldump $backup_flags > "$MYSQL_BACKUP_DIR/openstack_database.sql"
cp -rdp /etc/my.cnf* "$MYSQL_BACKUP_DIR"
fi
pcs resource disable redis
check_resource redis stopped 600
pcs resource disable rabbitmq
check_resource rabbitmq stopped 600
pcs resource disable galera
check_resource galera stopped 600
pcs resource disable openstack-cinder-volume
check_resource openstack-cinder-volume stopped 600
# Disable all VIPs before stopping the cluster, so that pcs doesn't use one as a source address:
# https://bugzilla.redhat.com/show_bug.cgi?id=1330688
for vip in $(pcs resource show | grep ocf::heartbeat:IPaddr2 | grep Started | awk '{ print $1 }'); do
pcs resource disable $vip
check_resource $vip stopped 60
done
pcs cluster stop --all
fi
# Swift isn't controlled by pacemaker
systemctl_swift stop
tstart=$(date +%s)
while systemctl is-active pacemaker; do
sleep 5
tnow=$(date +%s)
if (( tnow-tstart > cluster_sync_timeout )) ; then
echo_error "ERROR: cluster shutdown timed out"
exit 1
fi
done
# The reason we do an sql dump *and* we move the old dir out of
# the way is because it gives us an extra level of safety in case
# something goes wrong during the upgrade. Once the restore is
# successful we go ahead and remove it. If the directory exists
# we bail out as it means the upgrade process had issues in the last
# run.
if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
if [ -d $MYSQL_TEMP_UPGRADE_BACKUP_DIR ]; then
echo_error "ERROR: mysql backup dir already exist"
exit 1
fi
mv /var/lib/mysql $MYSQL_TEMP_UPGRADE_BACKUP_DIR
fi
# Special-case OVS for https://bugs.launchpad.net/tripleo/+bug/1635205
if [[ -n $(rpm -q --scripts openvswitch | awk '/postuninstall/,/*/' | grep "systemctl.*try-restart") ]]; then
echo "Manual upgrade of openvswitch - restart in postun detected"
mkdir OVS_UPGRADE || true
pushd OVS_UPGRADE
echo "Attempting to downloading latest openvswitch with yumdownloader"
yumdownloader --resolve openvswitch
echo "Updating openvswitch with nopostun option"
rpm -U --replacepkgs --nopostun ./*.rpm
popd
else
echo "Skipping manual upgrade of openvswitch - no restart in postun detected"
fi
yum -y install python-zaqarclient # needed for os-collect-config
yum -y -q update
# We need to ensure at least those two configuration settings, otherwise
# mariadb 10.1+ won't activate galera replication.
# wsrep_cluster_address must only be set though, its value does not
# matter because it's overriden by the galera resource agent.
cat >> /etc/my.cnf.d/galera.cnf <<EOF
[mysqld]
wsrep_on = ON
wsrep_cluster_address = gcomm://localhost
EOF
if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
# Scripts run via heat have no HOME variable set and this confuses
# mysqladmin
export HOME=/root
mkdir /var/lib/mysql || /bin/true
chown mysql:mysql /var/lib/mysql
chmod 0755 /var/lib/mysql
restorecon -R /var/lib/mysql/
mysql_install_db --datadir=/var/lib/mysql --user=mysql
chown -R mysql:mysql /var/lib/mysql/
if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
mysqld_safe --wsrep-new-cluster &
# We have a populated /root/.my.cnf with root/password here so
# we need to temporarily rename it because the newly created
# db is empty and no root password is set
mv /root/.my.cnf /root/.my.cnf.temporary
timeout 60 sh -c 'while ! mysql -e "" &> /dev/null; do sleep 1; done'
mysql -u root < "$MYSQL_BACKUP_DIR/openstack_database.sql"
mv /root/.my.cnf.temporary /root/.my.cnf
mysqladmin -u root shutdown
# The import was successful so we may remove the folder
rm -r "$MYSQL_BACKUP_DIR"
fi
fi
# If we reached here without error we can safely blow away the origin
# mysql dir from every controller
# TODO: What if the upgrade fails on the bootstrap node, but not on
# this controller. Data may be lost.
if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
rm -r $MYSQL_TEMP_UPGRADE_BACKUP_DIR
fi
# Let's reset the stonith back to true if it was true, before starting the cluster
if [[ -n $(is_bootstrap_node) ]]; then
if [ $STONITH_STATE == "true" ]; then
pcs -f /var/lib/pacemaker/cib/cib.xml property set stonith-enabled=true
fi
fi
# Pin messages sent to compute nodes to kilo, these will be upgraded later
crudini --set /etc/nova/nova.conf upgrade_levels compute "$upgrade_level_nova_compute"
# https://bugzilla.redhat.com/show_bug.cgi?id=1284047
# Change-Id: Ib3f6c12ff5471e1f017f28b16b1e6496a4a4b435
crudini --set /etc/ceilometer/ceilometer.conf DEFAULT rpc_backend rabbit
# https://bugzilla.redhat.com/show_bug.cgi?id=1284058
# Ifd1861e3df46fad0e44ff9b5cbd58711bbc87c97 Swift Ceilometer middleware no longer exists
crudini --set /etc/swift/proxy-server.conf pipeline:main pipeline "catch_errors healthcheck cache ratelimit tempurl formpost authtoken keystone staticweb proxy-logging proxy-server"
# LP: 1615035, required only for M/N upgrade.
crudini --set /etc/nova/nova.conf DEFAULT scheduler_host_manager host_manager
# LP: 1627450, required only for M/N upgrade
crudini --set /etc/nova/nova.conf DEFAULT scheduler_driver filter_scheduler
crudini --set /etc/sahara/sahara.conf DEFAULT plugins ambari,cdh,mapr,vanilla,spark,storm

View File

@ -2,67 +2,186 @@
set -eu
cluster_form_timeout=600
cluster_settle_timeout=1800
galera_sync_timeout=600
cluster_sync_timeout=1800
# After migrating the cluster to HA-NG the services not under pacemaker's control
# are still up and running. We need to stop them explicitely otherwise during the yum
# upgrade the rpm %post sections will try to do a systemctl try-restart <service>, which
# is going to take a long time because rabbit is down. By having the service stopped
# systemctl try-restart is a noop
for service in $(services_to_migrate); do
manage_systemd_service stop "${service%%-clone}"
# So the reason for not reusing check_resource_systemd is that
# I have observed systemctl is-active returning unknown with at least
# one service that was stopped (See LP 1627254)
timeout=600
tstart=$(date +%s)
tend=$(( $tstart + $timeout ))
check_interval=3
while (( $(date +%s) < $tend )); do
if [[ "$(systemctl is-active ${service%%-clone})" = "active" ]]; then
echo "$service still active, sleeping $check_interval seconds."
sleep $check_interval
else
# we do not care if it is inactive, unknown or failed as long as it is
# not running
break
fi
done
done
# In case the mysql package is updated, the database on disk must be
# upgraded as well. This typically needs to happen during major
# version upgrades (e.g. 5.5 -> 5.6, 5.5 -> 10.1...)
#
# Because in-place upgrades are not supported across 2+ major versions
# (e.g. 5.5 -> 10.1), we rely on logical upgrades via dump/restore cycle
# https://bugzilla.redhat.com/show_bug.cgi?id=1341968
#
# The default is to determine automatically if upgrade is needed based
# on mysql package versionning, but this can be overriden manually
# to support specific upgrade scenario
# Calling this function will set the DO_MYSQL_UPGRADE variable which is used
# later
mysql_need_update
if [[ -n $(is_bootstrap_node) ]]; then
pcs cluster start --all
tstart=$(date +%s)
while pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; do
sleep 5
tnow=$(date +%s)
if (( tnow-tstart > cluster_form_timeout )) ; then
echo_error "ERROR: timed out forming the cluster"
exit 1
fi
done
if ! timeout -k 10 $cluster_settle_timeout crm_resource --wait; then
echo_error "ERROR: timed out waiting for cluster to finish transition"
exit 1
if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
mysqldump $backup_flags > "$MYSQL_BACKUP_DIR/openstack_database.sql"
cp -rdp /etc/my.cnf* "$MYSQL_BACKUP_DIR"
fi
for vip in $(pcs resource show | grep ocf::heartbeat:IPaddr2 | grep Stopped | awk '{ print $1 }'); do
pcs resource enable $vip
check_resource_pacemaker $vip started 60
pcs resource disable redis
check_resource redis stopped 600
pcs resource disable rabbitmq
check_resource rabbitmq stopped 600
pcs resource disable galera
check_resource galera stopped 600
pcs resource disable openstack-cinder-volume
check_resource openstack-cinder-volume stopped 600
# Disable all VIPs before stopping the cluster, so that pcs doesn't use one as a source address:
# https://bugzilla.redhat.com/show_bug.cgi?id=1330688
for vip in $(pcs resource show | grep ocf::heartbeat:IPaddr2 | grep Started | awk '{ print $1 }'); do
pcs resource disable $vip
check_resource $vip stopped 60
done
pcs cluster stop --all
fi
start_or_enable_service galera
check_resource galera started 600
start_or_enable_service redis
check_resource redis started 600
# We need mongod which is now a systemd service up and running before calling
# ceilometer-dbsync. There is still a race here: mongod might not be up on all nodes
# so ceilometer-dbsync will fail a couple of times before that. As it retries indefinitely
# we should be good.
# Due to LP Bug https://bugs.launchpad.net/tripleo/+bug/1627254 am using systemctl directly atm
systemctl start mongod
check_resource mongod started 600
# Swift isn't controlled by pacemaker
systemctl_swift stop
tstart=$(date +%s)
while systemctl is-active pacemaker; do
sleep 5
tnow=$(date +%s)
if (( tnow-tstart > cluster_sync_timeout )) ; then
echo_error "ERROR: cluster shutdown timed out"
exit 1
fi
done
# The reason we do an sql dump *and* we move the old dir out of
# the way is because it gives us an extra level of safety in case
# something goes wrong during the upgrade. Once the restore is
# successful we go ahead and remove it. If the directory exists
# we bail out as it means the upgrade process had issues in the last
# run.
if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
if [ -d $MYSQL_TEMP_UPGRADE_BACKUP_DIR ]; then
echo_error "ERROR: mysql backup dir already exist"
exit 1
fi
mv /var/lib/mysql $MYSQL_TEMP_UPGRADE_BACKUP_DIR
fi
# Special-case OVS for https://bugs.launchpad.net/tripleo/+bug/1635205
if [[ -n $(rpm -q --scripts openvswitch | awk '/postuninstall/,/*/' | grep "systemctl.*try-restart") ]]; then
echo "Manual upgrade of openvswitch - restart in postun detected"
mkdir OVS_UPGRADE || true
pushd OVS_UPGRADE
echo "Attempting to downloading latest openvswitch with yumdownloader"
yumdownloader --resolve openvswitch
echo "Updating openvswitch with nopostun option"
rpm -U --replacepkgs --nopostun ./*.rpm
popd
else
echo "Skipping manual upgrade of openvswitch - no restart in postun detected"
fi
yum -y install python-zaqarclient # needed for os-collect-config
yum -y -q update
# We need to ensure at least those two configuration settings, otherwise
# mariadb 10.1+ won't activate galera replication.
# wsrep_cluster_address must only be set though, its value does not
# matter because it's overriden by the galera resource agent.
cat >> /etc/my.cnf.d/galera.cnf <<EOF
[mysqld]
wsrep_on = ON
wsrep_cluster_address = gcomm://localhost
EOF
if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
# Scripts run via heat have no HOME variable set and this confuses
# mysqladmin
export HOME=/root
mkdir /var/lib/mysql || /bin/true
chown mysql:mysql /var/lib/mysql
chmod 0755 /var/lib/mysql
restorecon -R /var/lib/mysql/
mysql_install_db --datadir=/var/lib/mysql --user=mysql
chown -R mysql:mysql /var/lib/mysql/
if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
mysqld_safe --wsrep-new-cluster &
# We have a populated /root/.my.cnf with root/password here so
# we need to temporarily rename it because the newly created
# db is empty and no root password is set
mv /root/.my.cnf /root/.my.cnf.temporary
timeout 60 sh -c 'while ! mysql -e "" &> /dev/null; do sleep 1; done'
mysql -u root < "$MYSQL_BACKUP_DIR/openstack_database.sql"
mv /root/.my.cnf.temporary /root/.my.cnf
mysqladmin -u root shutdown
# The import was successful so we may remove the folder
rm -r "$MYSQL_BACKUP_DIR"
fi
fi
# If we reached here without error we can safely blow away the origin
# mysql dir from every controller
# TODO: What if the upgrade fails on the bootstrap node, but not on
# this controller. Data may be lost.
if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
rm -r $MYSQL_TEMP_UPGRADE_BACKUP_DIR
fi
# Let's reset the stonith back to true if it was true, before starting the cluster
if [[ -n $(is_bootstrap_node) ]]; then
tstart=$(date +%s)
while ! clustercheck; do
sleep 5
tnow=$(date +%s)
if (( tnow-tstart > galera_sync_timeout )) ; then
echo_error "ERROR galera sync timed out"
exit 1
fi
done
# Run all the db syncs
# TODO: check if this can be triggered in puppet and removed from here
ceilometer-dbsync --config-file=/etc/ceilometer/ceilometer.conf
cinder-manage db sync
glance-manage --config-file=/etc/glance/glance-registry.conf db_sync
heat-manage --config-file /etc/heat/heat.conf db_sync
keystone-manage db_sync
neutron-db-manage --config-file /etc/neutron/neutron.conf --config-file /etc/neutron/plugin.ini upgrade head
nova-manage db sync
nova-manage api_db sync
nova-manage db online_data_migrations
sahara-db-manage --config-file /etc/sahara/sahara.conf upgrade head
if [ -f /var/tmp/stonith-true ]; then
pcs -f /var/lib/pacemaker/cib/cib.xml property set stonith-enabled=true
fi
rm -f /var/tmp/stonith-true
fi
# Pin messages sent to compute nodes to kilo, these will be upgraded later
crudini --set /etc/nova/nova.conf upgrade_levels compute "$upgrade_level_nova_compute"
# https://bugzilla.redhat.com/show_bug.cgi?id=1284047
# Change-Id: Ib3f6c12ff5471e1f017f28b16b1e6496a4a4b435
crudini --set /etc/ceilometer/ceilometer.conf DEFAULT rpc_backend rabbit
# https://bugzilla.redhat.com/show_bug.cgi?id=1284058
# Ifd1861e3df46fad0e44ff9b5cbd58711bbc87c97 Swift Ceilometer middleware no longer exists
crudini --set /etc/swift/proxy-server.conf pipeline:main pipeline "catch_errors healthcheck cache ratelimit tempurl formpost authtoken keystone staticweb proxy-logging proxy-server"
# LP: 1615035, required only for M/N upgrade.
crudini --set /etc/nova/nova.conf DEFAULT scheduler_host_manager host_manager
# LP: 1627450, required only for M/N upgrade
crudini --set /etc/nova/nova.conf DEFAULT scheduler_driver filter_scheduler
crudini --set /etc/sahara/sahara.conf DEFAULT plugins ambari,cdh,mapr,vanilla,spark,storm

View File

@ -2,16 +2,67 @@
set -eu
start_or_enable_service rabbitmq
check_resource rabbitmq started 600
cluster_form_timeout=600
cluster_settle_timeout=1800
galera_sync_timeout=600
if [[ -n $(is_bootstrap_node) ]]; then
pcs cluster start --all
tstart=$(date +%s)
while pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; do
sleep 5
tnow=$(date +%s)
if (( tnow-tstart > cluster_form_timeout )) ; then
echo_error "ERROR: timed out forming the cluster"
exit 1
fi
done
if ! timeout -k 10 $cluster_settle_timeout crm_resource --wait; then
echo_error "ERROR: timed out waiting for cluster to finish transition"
exit 1
fi
for vip in $(pcs resource show | grep ocf::heartbeat:IPaddr2 | grep Stopped | awk '{ print $1 }'); do
pcs resource enable $vip
check_resource_pacemaker $vip started 60
done
fi
start_or_enable_service galera
check_resource galera started 600
start_or_enable_service redis
check_resource redis started 600
start_or_enable_service openstack-cinder-volume
check_resource openstack-cinder-volume started 600
# We need mongod which is now a systemd service up and running before calling
# ceilometer-dbsync. There is still a race here: mongod might not be up on all nodes
# so ceilometer-dbsync will fail a couple of times before that. As it retries indefinitely
# we should be good.
# Due to LP Bug https://bugs.launchpad.net/tripleo/+bug/1627254 am using systemctl directly atm
systemctl start mongod
check_resource mongod started 600
# start httpd so keystone is available for gnocchi
# upgrade to run.
systemctl start httpd
if [[ -n $(is_bootstrap_node) ]]; then
tstart=$(date +%s)
while ! clustercheck; do
sleep 5
tnow=$(date +%s)
if (( tnow-tstart > galera_sync_timeout )) ; then
echo_error "ERROR galera sync timed out"
exit 1
fi
done
# Swift isn't controled by pacemaker
systemctl_swift start
# Run all the db syncs
# TODO: check if this can be triggered in puppet and removed from here
ceilometer-dbsync --config-file=/etc/ceilometer/ceilometer.conf
cinder-manage db sync
glance-manage --config-file=/etc/glance/glance-registry.conf db_sync
heat-manage --config-file /etc/heat/heat.conf db_sync
keystone-manage db_sync
neutron-db-manage --config-file /etc/neutron/neutron.conf --config-file /etc/neutron/plugin.ini upgrade head
nova-manage db sync
nova-manage api_db sync
nova-manage db online_data_migrations
sahara-db-manage --config-file /etc/sahara/sahara.conf upgrade head
fi

View File

@ -2,7 +2,16 @@
set -eu
if [[ -n $(is_bootstrap_node) ]]; then
# run gnocchi upgrade
gnocchi-upgrade
fi
start_or_enable_service rabbitmq
check_resource rabbitmq started 600
start_or_enable_service redis
check_resource redis started 600
start_or_enable_service openstack-cinder-volume
check_resource openstack-cinder-volume started 600
# start httpd so keystone is available for gnocchi
# upgrade to run.
systemctl start httpd
# Swift isn't controled by pacemaker
systemctl_swift start

View File

@ -2,14 +2,7 @@
set -eu
# We need to start the systemd services we explicitely stopped at step _1.sh
# FIXME: Should we let puppet during the convergence step do the service enabling or
# should we add it here?
services=$(services_to_migrate)
if [[ ${keep_sahara_services_on_upgrade} =~ [Ff]alse ]] ; then
services=${services%%openstack-sahara*}
if [[ -n $(is_bootstrap_node) ]]; then
# run gnocchi upgrade
gnocchi-upgrade
fi
for service in $services; do
manage_systemd_service start "${service%%-clone}"
check_resource_systemd "${service%%-clone}" started 600
done

View File

@ -0,0 +1,15 @@
#!/bin/bash
set -eu
# We need to start the systemd services we explicitely stopped at step _1.sh
# FIXME: Should we let puppet during the convergence step do the service enabling or
# should we add it here?
services=$(services_to_migrate)
if [[ ${keep_sahara_services_on_upgrade} =~ [Ff]alse ]] ; then
services=${services%%openstack-sahara*}
fi
for service in $services; do
manage_systemd_service start "${service%%-clone}"
check_resource_systemd "${service%%-clone}" started 600
done

View File

@ -113,7 +113,20 @@ resources:
config:
list_join:
- ''
- - get_file: pacemaker_common_functions.sh
- - str_replace:
template: |
#!/bin/bash
upgrade_level_nova_compute='UPGRADE_LEVEL_NOVA_COMPUTE'
params:
UPGRADE_LEVEL_NOVA_COMPUTE: {get_param: UpgradeLevelNovaCompute}
- str_replace:
template: |
#!/bin/bash
mariadb_do_major_upgrade='MYSQL_MAJOR_UPGRADE'
params:
MYSQL_MAJOR_UPGRADE: {get_param: MySqlMajorUpgrade}
- get_file: pacemaker_common_functions.sh
- get_file: major_upgrade_check.sh
- get_file: major_upgrade_pacemaker_migrations.sh
- get_file: major_upgrade_controller_pacemaker_2.sh
@ -170,13 +183,7 @@ resources:
config:
list_join:
- ''
- - str_replace:
template: |
#!/bin/bash
keep_sahara_services_on_upgrade='KEEP_SAHARA_SERVICES_ON_UPGRADE'
params:
KEEP_SAHARA_SERVICES_ON_UPGRADE: {get_param: KeepSaharaServicesOnUpgrade}
- get_file: pacemaker_common_functions.sh
- - get_file: pacemaker_common_functions.sh
- get_file: major_upgrade_pacemaker_migrations.sh
- get_file: major_upgrade_controller_pacemaker_5.sh
@ -187,3 +194,28 @@ resources:
servers: {get_param: [servers, Controller]}
config: {get_resource: ControllerPacemakerUpgradeConfig_Step5}
input_values: {get_param: input_values}
ControllerPacemakerUpgradeConfig_Step6:
type: OS::Heat::SoftwareConfig
properties:
group: script
config:
list_join:
- ''
- - str_replace:
template: |
#!/bin/bash
keep_sahara_services_on_upgrade='KEEP_SAHARA_SERVICES_ON_UPGRADE'
params:
KEEP_SAHARA_SERVICES_ON_UPGRADE: {get_param: KeepSaharaServicesOnUpgrade}
- get_file: pacemaker_common_functions.sh
- get_file: major_upgrade_pacemaker_migrations.sh
- get_file: major_upgrade_controller_pacemaker_6.sh
ControllerPacemakerUpgradeDeployment_Step6:
type: OS::Heat::SoftwareDeploymentGroup
depends_on: ControllerPacemakerUpgradeDeployment_Step5
properties:
servers: {get_param: [servers, Controller]}
config: {get_resource: ControllerPacemakerUpgradeConfig_Step6}
input_values: {get_param: input_values}